MALI: drm/arm: Update midgard kernel driver to r20

This updates the midgard driver to work with the latest mali binary
we got (the r20).

Note:
Alone, this patch breaks the build, and another patch
is required to update the platform driver.

Signed-off-by: Alexandre Bailon <abailon@baylibre.com>
diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild
index 3e1c915..4d0b557 100644
--- a/drivers/gpu/arm/midgard/Kbuild
+++ b/drivers/gpu/arm/midgard/Kbuild
@@ -21,7 +21,7 @@
 
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r14p0-01rel0"
+MALI_RELEASE_NAME ?= "r20p0-01rel0"
 
 # Paths required for build
 KBASE_PATH = $(src)
@@ -33,7 +33,6 @@
 MALI_USE_CSF ?= 0
 MALI_UNIT_TEST ?= 0
 MALI_KERNEL_TEST_API ?= 0
-MALI_MOCK_TEST ?= 0
 MALI_COVERAGE ?= 0
 CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
 
@@ -43,7 +42,6 @@
 	-DMALI_USE_CSF=$(MALI_USE_CSF) \
 	-DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
 	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
-	-DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
 	-DMALI_COVERAGE=$(MALI_COVERAGE) \
 	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\"
 
@@ -61,13 +59,15 @@
 
 # Use our defines when compiling
 ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
-subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH)   -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
 
 SRC := \
 	mali_kbase_device.c \
 	mali_kbase_cache_policy.c \
 	mali_kbase_mem.c \
+	mali_kbase_mem_pool_group.c \
 	mali_kbase_mmu.c \
+	mali_kbase_native_mgm.c \
 	mali_kbase_ctx_sched.c \
 	mali_kbase_jd.c \
 	mali_kbase_jd_debugfs.c \
@@ -80,38 +80,45 @@
 	mali_kbase_pm.c \
 	mali_kbase_config.c \
 	mali_kbase_vinstr.c \
+	mali_kbase_hwcnt.c \
+	mali_kbase_hwcnt_backend_gpu.c \
+	mali_kbase_hwcnt_gpu.c \
+	mali_kbase_hwcnt_legacy.c \
+	mali_kbase_hwcnt_types.c \
+	mali_kbase_hwcnt_virtualizer.c \
 	mali_kbase_softjobs.c \
 	mali_kbase_10969_workaround.c \
 	mali_kbase_hw.c \
-	mali_kbase_utility.c \
 	mali_kbase_debug.c \
 	mali_kbase_gpu_memory_debugfs.c \
 	mali_kbase_mem_linux.c \
 	mali_kbase_core_linux.c \
-	mali_kbase_replay.c \
 	mali_kbase_mem_profile_debugfs.c \
 	mali_kbase_mmu_mode_lpae.c \
 	mali_kbase_mmu_mode_aarch64.c \
 	mali_kbase_disjoint_events.c \
-	mali_kbase_gator_api.c \
 	mali_kbase_debug_mem_view.c \
 	mali_kbase_debug_job_fault.c \
 	mali_kbase_smc.c \
 	mali_kbase_mem_pool.c \
 	mali_kbase_mem_pool_debugfs.c \
+	mali_kbase_debugfs_helper.c \
+	mali_kbase_timeline.c \
+	mali_kbase_timeline_io.c \
 	mali_kbase_tlstream.c \
+	mali_kbase_tracepoints.c \
 	mali_kbase_strings.c \
 	mali_kbase_as_fault_debugfs.c \
 	mali_kbase_regs_history_debugfs.c \
 	thirdparty/mali_kbase_mmap.c
 
 
-ifeq ($(CONFIG_MALI_JOB_DUMP),y)
+ifeq ($(CONFIG_MALI_CINSTR_GWT),y)
 	SRC += mali_kbase_gwt.c
 endif
 
 ifeq ($(MALI_UNIT_TEST),1)
-	SRC += mali_kbase_tlstream_test.c
+	SRC += mali_kbase_timeline_test.c
 endif
 
 ifeq ($(MALI_CUSTOMER_RELEASE),0)
@@ -154,11 +161,6 @@
 	mali_kbase_sync_common.o \
 	mali_kbase_fence.o
 
-ifeq ($(MALI_MOCK_TEST),1)
-# Test functionality
-mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
-endif
-
 include  $(src)/backend/gpu/Kbuild
 mali_kbase-y += $(BACKEND:.c=.o)
 
diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig
index 6273f33..ca3e29c 100644
--- a/drivers/gpu/arm/midgard/Kconfig
+++ b/drivers/gpu/arm/midgard/Kconfig
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -22,9 +22,8 @@
 
 menuconfig MALI_MIDGARD
 	tristate "Mali Midgard series support"
-	depends on ARM || ARM64
-	depends on THERMAL || THERMAL=n
 	select GPU_TRACEPOINTS if ANDROID
+	select DMA_SHARED_BUFFER
 	default n
 	help
 	  Enable this option to build support for a ARM Mali Midgard GPU.
@@ -33,19 +32,16 @@
 	  this will generate a single module, called mali_kbase.
 
 config MALI_GATOR_SUPPORT
-	bool "Streamline support via Gator"
+	bool "Enable Streamline tracing support"
 	depends on MALI_MIDGARD
-	default n
+	default y
 	help
-	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
-	  You will need the Gator device driver already loaded before loading this driver when enabling
-	  Streamline debug support.
-	  This is a legacy interface required by older versions of Streamline.
+	  Enables kbase tracing used by the Arm Streamline Performance Analyzer.
+	  The tracepoints are used to derive GPU activity charts in Streamline.
 
 config MALI_MIDGARD_DVFS
 	bool "Enable legacy DVFS"
 	depends on MALI_MIDGARD && !MALI_DEVFREQ
-	depends on BROKEN
 	default n
 	help
 	  Choose this option to enable legacy DVFS in the Mali Midgard DDK.
@@ -111,6 +107,20 @@
 
 	  If unsure, say N.
 
+config MALI_PLATFORM_POWER_DOWN_ONLY
+	bool "Support disabling the power down of individual cores"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature will let the driver avoid power down of the
+	  shader cores, the tiler, and the L2 cache.
+	  The entire GPU would be powered down at once through the platform
+	  specific code.
+	  This may be required for certain platform configurations only.
+	  This also limits the available power policies.
+
+	  If unsure, say N.
+
 config MALI_DEBUG
 	bool "Debug build"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -138,7 +148,6 @@
 config MALI_NO_MALI
 	bool "No Mali"
 	depends on MALI_MIDGARD && MALI_EXPERT
-	depends on BROKEN
 	default n
 	help
 	  This can be used to test the driver in a simulated environment
@@ -148,6 +157,9 @@
 	  All calls to the simulated hardware will complete immediately as if the hardware
 	  completed the task.
 
+config MALI_REAL_HW
+	def_bool !MALI_NO_MALI
+
 config MALI_ERROR_INJECT
 	bool "Error injection"
 	depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
@@ -158,7 +170,6 @@
 config MALI_SYSTEM_TRACE
 	bool "Enable system event tracing support"
 	depends on MALI_MIDGARD && MALI_EXPERT
-	depends on BROKEN
 	default n
 	help
 	  Choose this option to enable system trace events for each
@@ -189,6 +200,60 @@
 	  If using kernel >= v4.10 then say N, otherwise if devfreq cooling
 	  changes have been backported say Y to avoid compilation errors.
 
+config MALI_MEMORY_FULLY_BACKED
+	bool "Memory fully physically-backed"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This option enables full physical backing of all virtual
+	  memory allocations in the kernel. Notice that this build
+	  option only affects allocations of grow-on-GPU-page-fault
+	  memory.
+
+config MALI_DMA_BUF_MAP_ON_DEMAND
+	bool "Map imported dma-bufs on demand"
+	depends on MALI_MIDGARD
+	default n
+	help
+	  This option caused kbase to set up the GPU mapping of imported
+	  dma-buf when needed to run atoms.  This is the legacy behaviour.
+
+	  This is intended for testing and the option will get removed in the
+	  future.
+
+config MALI_DMA_BUF_LEGACY_COMPAT
+	bool "Enable legacy compatibility cache flush on dma-buf map"
+	depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND
+	default y
+	help
+	  This option enables compatibility with legacy dma-buf mapping
+	  behavior, then the dma-buf is mapped on import, by adding cache
+	  maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping,
+	  including a cache flush.
+
+config MALI_HW_ERRATA_1485982_NOT_AFFECTED
+	bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  This option disables the default workaround for GPU2017-1336. The
+	  workaround keeps the L2 cache powered up except for powerdown and reset.
+
+	  The workaround introduces a limitation that will prevent the running of
+	  protected mode content on fully coherent platforms, as the switch to IO
+	  coherency mode requires the L2 to be turned off.
+
+config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE
+	bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336"
+	depends on MALI_MIDGARD && MALI_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED
+	default n
+	help
+	  This option uses an alternative workaround for GPU2017-1336. Lowering
+	  the GPU clock to a, platform specific, known good frequeuncy before
+	  powering down the L2 cache. The clock can be specified in the device
+	  tree using the property, opp-mali-errata-1485982. Otherwise the
+	  slowest clock will be selected.
+
 # Instrumentation options.
 
 config MALI_JOB_DUMP
@@ -215,3 +280,4 @@
 	  If unsure, say N.
 
 source "drivers/gpu/arm/midgard/platform/Kconfig"
+source "drivers/gpu/arm/midgard/tests/Kconfig"
diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile
index 13af9f4..53a12094 100644
--- a/drivers/gpu/arm/midgard/Makefile
+++ b/drivers/gpu/arm/midgard/Makefile
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -25,11 +25,7 @@
 BUSLOG_PATH_RELATIVE = $(CURDIR)/../../../..
 KBASE_PATH_RELATIVE = $(CURDIR)
 
-ifeq ($(MALI_UNIT_TEST), 1)
-	EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
-endif
-
-ifeq ($(CONFIG_MALI_FPGA_BUS_LOGGER),y)
+ifeq ($(CONFIG_MALI_BUSLOG),y)
 #Add bus logger symbols
 EXTRA_SYMBOLS += $(BUSLOG_PATH_RELATIVE)/drivers/base/bus_logger/Module.symvers
 endif
diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase
index d7898cb..6b0f81e 100644
--- a/drivers/gpu/arm/midgard/Makefile.kbase
+++ b/drivers/gpu/arm/midgard/Makefile.kbase
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010, 2013, 2018 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -19,5 +19,5 @@
 #
 #
 
-EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(KBASE_PATH)/platform_$(PLATFORM)
 
diff --git a/drivers/gpu/arm/midgard/Mconfig b/drivers/gpu/arm/midgard/Mconfig
index 583dec3..27e0d63 100644
--- a/drivers/gpu/arm/midgard/Mconfig
+++ b/drivers/gpu/arm/midgard/Mconfig
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -23,15 +23,12 @@
 	  this will generate a single module, called mali_kbase.
 
 config MALI_GATOR_SUPPORT
-	bool "Streamline support via Gator"
+	bool "Enable Streamline tracing support"
 	depends on MALI_MIDGARD && !BACKEND_USER
-	default y if INSTRUMENTATION_STREAMLINE_OLD
-	default n
+	default y
 	help
-	  Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
-	  You will need the Gator device driver already loaded before loading this driver when enabling
-	  Streamline debug support.
-	  This is a legacy interface required by older versions of Streamline.
+	  Enables kbase tracing used by the Arm Streamline Performance Analyzer.
+	  The tracepoints are used to derive GPU activity charts in Streamline.
 
 config MALI_MIDGARD_DVFS
 	bool "Enable legacy DVFS"
@@ -73,11 +70,8 @@
 config MALI_PLATFORM_NAME
 	depends on MALI_MIDGARD
 	string "Platform name"
-	default "arndale" if PLATFORM_ARNDALE
-	default "arndale_octa" if PLATFORM_ARNDALE_OCTA
-	default "rk" if PLATFORM_FIREFLY
 	default "hisilicon" if PLATFORM_HIKEY960
-	default "vexpress" if PLATFORM_VEXPRESS
+	default "hisilicon" if PLATFORM_HIKEY970
 	default "devicetree"
 	help
 	  Enter the name of the desired platform configuration directory to
@@ -87,11 +81,6 @@
 	  When PLATFORM_CUSTOM is set, this needs to be set manually to
 	  pick up the desired platform files.
 
-config MALI_MOCK_TEST
-	bool
-	depends on MALI_MIDGARD && !RELEASE
-	default y
-
 # MALI_EXPERT configuration options
 
 menuconfig MALI_EXPERT
@@ -115,6 +104,20 @@
 
 	  If unsure, say N.
 
+config MALI_PLATFORM_POWER_DOWN_ONLY
+	bool "Support disabling the power down of individual cores"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  Enabling this feature will let the driver avoid power down of the
+	  shader cores, the tiler, and the L2 cache.
+	  The entire GPU would be powered down at once through the platform
+	  specific code.
+	  This may be required for certain platform configurations only.
+	  This also limits the available power policies.
+
+	  If unsure, say N.
+
 config MALI_DEBUG
 	bool "Debug build"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -140,20 +143,41 @@
 	  The timeout can be changed at runtime through the js_soft_timeout
 	  device attribute, where the timeout is specified in milliseconds.
 
-config MALI_ERROR_INJECT
-	bool "Error injection"
-	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
-	default n
+choice
+	prompt "Error injection level"
+	default MALI_ERROR_INJECT_NONE
 	help
 	  Enables insertion of errors to test module failure and recovery mechanisms.
 
+config MALI_ERROR_INJECT_NONE
+	bool "disabled"
+	help
+	  Error injection is disabled.
+
+config MALI_ERROR_INJECT_TRACK_LIST
+	bool "error track list"
+	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
+	help
+	  Errors to inject are pre-configured by the user.
+
 config MALI_ERROR_INJECT_RANDOM
-	bool "Random error injection"
-	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI && MALI_ERROR_INJECT
-	default n
+	bool "random error injection"
+	depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
 	help
 	  Injected errors are random, rather than user-driven.
 
+endchoice
+
+config MALI_ERROR_INJECT_ON
+	string
+	default "0" if MALI_ERROR_INJECT_NONE
+	default "1" if MALI_ERROR_INJECT_TRACK_LIST
+	default "2" if MALI_ERROR_INJECT_RANDOM
+
+config MALI_ERROR_INJECT
+	bool
+	default y if !MALI_ERROR_INJECT_NONE
+
 config MALI_SYSTEM_TRACE
 	bool "Enable system event tracing support"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -175,11 +199,6 @@
 
 	  If in doubt, say N
 
-config MALI_FPGA_BUS_LOGGER
-	bool "Enable bus log integration"
-	depends on MALI_MIDGARD && MALI_EXPERT
-	default n
-
 config MALI_PWRSOFT_765
 	bool "PWRSOFT-765 ticket"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -189,6 +208,61 @@
 	  not merged in mainline kernel yet. So this define helps to guard those
 	  parts of the code.
 
+config MALI_MEMORY_FULLY_BACKED
+	bool "Memory fully physically-backed"
+	default n
+	help
+	  This option enables full backing of all virtual memory allocations
+	  for the kernel. This only affects grow-on-GPU-page-fault memory.
+
+config MALI_DMA_BUF_MAP_ON_DEMAND
+	bool "Map imported dma-bufs on demand"
+	depends on MALI_MIDGARD
+	default n
+	default y if !DMA_BUF_SYNC_IOCTL_SUPPORTED
+	help
+	  This option caused kbase to set up the GPU mapping of imported
+	  dma-buf when needed to run atoms.  This is the legacy behaviour.
+
+config MALI_DMA_BUF_LEGACY_COMPAT
+	bool "Enable legacy compatibility cache flush on dma-buf map"
+	depends on MALI_MIDGARD && !MALI_DMA_BUF_MAP_ON_DEMAND
+	default y
+	help
+	  This option enables compatibility with legacy dma-buf mapping
+	  behavior, then the dma-buf is mapped on import, by adding cache
+	  maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping,
+	  including a cache flush.
+
+config MALI_REAL_HW
+	bool
+	default y
+	default n if NO_MALI
+
+config MALI_HW_ERRATA_1485982_NOT_AFFECTED
+	bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	default y if PLATFORM_JUNO
+	help
+	  This option disables the default workaround for GPU2017-1336. The
+	  workaround keeps the L2 cache powered up except for powerdown and reset.
+
+	  The workaround introduces a limitation that will prevent the running of
+	  protected mode content on fully coherent platforms, as the switch to IO
+	  coherency mode requires the L2 to be turned off.
+
+config MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE
+	bool "Use alternative workaround for BASE_HW_ISSUE_GPU2017_1336"
+	depends on MALI_MIDGARD && MALI_EXPERT && !MALI_HW_ERRATA_1485982_NOT_AFFECTED
+	default n
+	help
+	  This option uses an alternative workaround for GPU2017-1336. Lowering
+	  the GPU clock to a, platform specific, known good frequeuncy before
+	  powering down the L2 cache. The clock can be specified in the device
+	  tree using the property, opp-mali-errata-1485982. Otherwise the
+	  slowest clock will be selected.
+
 # Instrumentation options.
 
 # config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig.
diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
index dcd8ca4..2414d51 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/Kbuild
+++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -38,14 +38,13 @@
 	backend/gpu/mali_kbase_pm_ca.c \
 	backend/gpu/mali_kbase_pm_always_on.c \
 	backend/gpu/mali_kbase_pm_coarse_demand.c \
-	backend/gpu/mali_kbase_pm_demand.c \
 	backend/gpu/mali_kbase_pm_policy.c \
-	backend/gpu/mali_kbase_time.c
+	backend/gpu/mali_kbase_time.c \
+	backend/gpu/mali_kbase_l2_mmu_config.c
 
 ifeq ($(MALI_CUSTOMER_RELEASE),0)
 BACKEND += \
-	backend/gpu/mali_kbase_pm_demand_always_powered.c \
-	backend/gpu/mali_kbase_pm_fast_start.c
+	backend/gpu/mali_kbase_pm_always_on_demand.c
 endif
 
 ifeq ($(CONFIG_MALI_DEVFREQ),y)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
index 196a776..4a61f96 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,8 +27,5 @@
 #ifndef _KBASE_BACKEND_CONFIG_H_
 #define _KBASE_BACKEND_CONFIG_H_
 
-/* Enable GPU reset API */
-#define KBASE_GPU_RESET_EN 1
-
 #endif /* _KBASE_BACKEND_CONFIG_H_ */
 
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
index 36147cd..5c17297 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,8 +21,7 @@
  */
 
 #include <mali_kbase.h>
-#include <mali_kbase_tlstream.h>
-#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_tracepoints.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
 #include <linux/of.h>
@@ -54,69 +53,42 @@
  * @kbdev:     Device pointer
  * @freq:      Nominal frequency
  * @core_mask: Pointer to u64 to store core mask to
- *
- * Return: Real target frequency
+ * @freqs:     Pointer to array of frequencies
+ * @volts:     Pointer to array of voltages
  *
  * This function will only perform translation if an operating-points-v2-mali
  * table is present in devicetree. If one is not present then it will return an
  * untranslated frequency and all cores enabled.
  */
-static unsigned long opp_translate(struct kbase_device *kbdev,
-		unsigned long freq, u64 *core_mask)
+static void opp_translate(struct kbase_device *kbdev, unsigned long freq,
+	u64 *core_mask, unsigned long *freqs, unsigned long *volts)
 {
-	int i;
+	unsigned int i;
 
 	for (i = 0; i < kbdev->num_opps; i++) {
-		if (kbdev->opp_table[i].opp_freq == freq) {
-			*core_mask = kbdev->opp_table[i].core_mask;
-			return kbdev->opp_table[i].real_freq;
+		if (kbdev->devfreq_table[i].opp_freq == freq) {
+			unsigned int j;
+
+			*core_mask = kbdev->devfreq_table[i].core_mask;
+			for (j = 0; j < kbdev->nr_clocks; j++) {
+				freqs[j] =
+					kbdev->devfreq_table[i].real_freqs[j];
+				volts[j] =
+					kbdev->devfreq_table[i].opp_volts[j];
+			}
+
+			break;
 		}
 	}
 
-	/* Failed to find OPP - return all cores enabled & nominal frequency */
-	*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
-
-	return freq;
-}
-
-static void voltage_range_check(struct kbase_device *kbdev,
-				unsigned long *voltages)
-{
-	if (kbdev->devfreq_ops.voltage_range_check)
-		kbdev->devfreq_ops.voltage_range_check(kbdev, voltages);
-}
-
-#ifdef CONFIG_REGULATOR
-static int set_voltages(struct kbase_device *kbdev, unsigned long *voltages,
-			bool inc)
-{
-	int i;
-	int err;
-
-	if (kbdev->devfreq_ops.set_voltages)
-		return kbdev->devfreq_ops.set_voltages(kbdev, voltages, inc);
-
-	for (i = 0; i < kbdev->regulator_num; i++) {
-		err = regulator_set_voltage(kbdev->regulator[i],
-					    voltages[i], voltages[i]);
-		if (err) {
-			dev_err(kbdev->dev,
-				"Failed to set reg %d voltage err:(%d)\n",
-				i, err);
-			return err;
-		}
+	/* If failed to find OPP, return all cores enabled
+	 * and nominal frequency
+	 */
+	if (i == kbdev->num_opps) {
+		*core_mask = kbdev->gpu_props.props.raw_props.shader_present;
+		for (i = 0; i < kbdev->nr_clocks; i++)
+			freqs[i] = freq;
 	}
-
-	return 0;
-}
-#endif
-
-static int set_frequency(struct kbase_device *kbdev, unsigned long freq)
-{
-	if (kbdev->devfreq_ops.set_frequency)
-		return kbdev->devfreq_ops.set_frequency(kbdev, freq);
-
-	return clk_set_rate(kbdev->clock, freq);
 }
 
 static int
@@ -125,17 +97,17 @@
 	struct kbase_device *kbdev = dev_get_drvdata(dev);
 	struct dev_pm_opp *opp;
 	unsigned long nominal_freq;
-	unsigned long freq = 0;
-	unsigned long target_volt[KBASE_MAX_REGULATORS];
-	int err, i;
+	unsigned long freqs[BASE_MAX_NR_CLOCKS_REGULATORS] = {0};
+	unsigned long volts[BASE_MAX_NR_CLOCKS_REGULATORS] = {0};
+	unsigned int i;
 	u64 core_mask;
 
-	freq = *target_freq;
+	nominal_freq = *target_freq;
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_lock();
 #endif
-	opp = devfreq_recommended_opp(dev, &freq, flags);
+	opp = devfreq_recommended_opp(dev, &nominal_freq, flags);
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 	rcu_read_unlock();
 #endif
@@ -143,50 +115,84 @@
 		dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
 		return PTR_ERR(opp);
 	}
-
-	for (i = 0; i < kbdev->regulator_num; i++)
-		target_volt[i] = dev_pm_opp_get_voltage_supply(opp, i);
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
 	dev_pm_opp_put(opp);
 #endif
 
-	nominal_freq = freq;
-
 	/*
 	 * Only update if there is a change of frequency
 	 */
-	if (kbdev->current_nominal_freq == nominal_freq &&
-		kbdev->current_voltage[0] == target_volt[0]) {
+	if (kbdev->current_nominal_freq == nominal_freq) {
 		*target_freq = nominal_freq;
 		return 0;
 	}
 
-	freq = opp_translate(kbdev, nominal_freq, &core_mask);
-	voltage_range_check(kbdev, target_volt);
+	opp_translate(kbdev, nominal_freq, &core_mask, freqs, volts);
 
 #ifdef CONFIG_REGULATOR
-	if (kbdev->current_voltage[0] < target_volt[0]) {
-		err = set_voltages(kbdev, target_volt, true);
-		if (err) {
-			dev_err(kbdev->dev, "Failed to increase voltage\n");
-			return err;
+	/* Regulators and clocks work in pairs: every clock has a regulator,
+	 * and we never expect to have more regulators than clocks.
+	 *
+	 * We always need to increase the voltage before increasing
+	 * the frequency of a regulator/clock pair, otherwise the clock
+	 * wouldn't have enough power to perform the transition.
+	 *
+	 * It's always safer to decrease the frequency before decreasing
+	 * voltage of a regulator/clock pair, otherwise the clock could have
+	 * problems operating if it is deprived of the necessary power
+	 * to sustain its current frequency (even if that happens for a short
+	 * transition interval).
+	 */
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (kbdev->regulators[i] &&
+				kbdev->current_voltages[i] != volts[i] &&
+				kbdev->current_freqs[i] < freqs[i]) {
+			int err;
+
+			err = regulator_set_voltage(kbdev->regulators[i],
+				volts[i], volts[i]);
+			if (!err) {
+				kbdev->current_voltages[i] = volts[i];
+			} else {
+				dev_err(dev, "Failed to increase voltage (%d) (target %lu)\n",
+					err, volts[i]);
+				return err;
+			}
 		}
 	}
 #endif
 
-	err = set_frequency(kbdev, freq);
-	if (err) {
-		dev_err(dev, "Failed to set clock %lu (target %lu)\n",
-				freq, *target_freq);
-		return err;
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (kbdev->clocks[i]) {
+			int err;
+
+			err = clk_set_rate(kbdev->clocks[i], freqs[i]);
+			if (!err) {
+				kbdev->current_freqs[i] = freqs[i];
+			} else {
+				dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+					freqs[i], *target_freq);
+				return err;
+			}
+		}
 	}
 
 #ifdef CONFIG_REGULATOR
-	if (kbdev->current_voltage[0] > target_volt[0]) {
-		err = set_voltages(kbdev, target_volt, false);
-		if (err) {
-			dev_err(kbdev->dev, "Failed to decrease voltage\n");
-			return err;
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (kbdev->regulators[i] &&
+				kbdev->current_voltages[i] != volts[i] &&
+				kbdev->current_freqs[i] > freqs[i]) {
+			int err;
+
+			err = regulator_set_voltage(kbdev->regulators[i],
+				volts[i], volts[i]);
+			if (!err) {
+				kbdev->current_voltages[i] = volts[i];
+			} else {
+				dev_err(dev, "Failed to decrease voltage (%d) (target %lu)\n",
+					err, volts[i]);
+				return err;
+			}
 		}
 	}
 #endif
@@ -194,15 +200,19 @@
 	kbase_devfreq_set_core_mask(kbdev, core_mask);
 
 	*target_freq = nominal_freq;
-	for (i = 0; i < kbdev->regulator_num; i++)
-		kbdev->current_voltage[i] = target_volt[i];
 	kbdev->current_nominal_freq = nominal_freq;
-	kbdev->current_freq = freq;
 	kbdev->current_core_mask = core_mask;
 
-	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET((u64)nominal_freq);
+	KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)nominal_freq);
 
-	return err;
+	return 0;
+}
+
+void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq)
+{
+	unsigned long target_freq = freq;
+
+	kbase_devfreq_target(kbdev->dev, &target_freq, 0);
 }
 
 static int
@@ -277,16 +287,35 @@
 
 	dp->max_state = i;
 
+	/* Have the lowest clock as suspend clock.
+	 * It may be overridden by 'opp-mali-errata-1485982'.
+	 */
+	if (kbdev->pm.backend.gpu_clock_slow_down_wa) {
+		freq = 0;
+		opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
+		if (IS_ERR(opp)) {
+			dev_err(kbdev->dev, "failed to find slowest clock");
+			return 0;
+		}
+		dev_info(kbdev->dev, "suspend clock %lu from slowest", freq);
+		kbdev->pm.backend.gpu_clock_suspend_freq = freq;
+	}
+
 	return 0;
 }
 
 static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
 {
-	struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+	struct devfreq_dev_profile *dp = &kbdev->devfreq_profile;
 
 	kfree(dp->freq_table);
 }
 
+static void kbase_devfreq_term_core_mask_table(struct kbase_device *kbdev)
+{
+	kfree(kbdev->devfreq_table);
+}
+
 static void kbase_devfreq_exit(struct device *dev)
 {
 	struct kbase_device *kbdev = dev_get_drvdata(dev);
@@ -294,8 +323,52 @@
 	kbase_devfreq_term_freq_table(kbdev);
 }
 
+static void kbasep_devfreq_read_suspend_clock(struct kbase_device *kbdev,
+		struct device_node *node)
+{
+	u64 freq = 0;
+	int err = 0;
+
+	/* Check if this node is the opp entry having 'opp-mali-errata-1485982'
+	 * to get the suspend clock, otherwise skip it.
+	 */
+	if (!of_property_read_bool(node, "opp-mali-errata-1485982"))
+		return;
+
+	/* In kbase DevFreq, the clock will be read from 'opp-hz'
+	 * and translated into the actual clock by opp_translate.
+	 *
+	 * In customer DVFS, the clock will be read from 'opp-hz-real'
+	 * for clk driver. If 'opp-hz-real' does not exist,
+	 * read from 'opp-hz'.
+	 */
+	if (IS_ENABLED(CONFIG_MALI_DEVFREQ))
+		err = of_property_read_u64(node, "opp-hz", &freq);
+	else {
+		if (of_property_read_u64(node, "opp-hz-real", &freq))
+			err = of_property_read_u64(node, "opp-hz", &freq);
+	}
+
+	if (WARN_ON(err || !freq))
+		return;
+
+	kbdev->pm.backend.gpu_clock_suspend_freq = freq;
+	dev_info(kbdev->dev,
+		"suspend clock %llu by opp-mali-errata-1485982", freq);
+}
+
 static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
 {
+#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF)
+	/* OPP table initialization requires at least the capability to get
+	 * regulators and clocks from the device tree, as well as parsing
+	 * arrays of unsigned integer values.
+	 *
+	 * The whole initialization process shall simply be skipped if the
+	 * minimum capability is not available.
+	 */
+	return 0;
+#else
 	struct device_node *opp_node = of_parse_phandle(kbdev->dev->of_node,
 			"operating-points-v2", 0);
 	struct device_node *node;
@@ -309,26 +382,61 @@
 		return 0;
 
 	count = dev_pm_opp_get_opp_count(kbdev->dev);
-	kbdev->opp_table = kmalloc_array(count,
+	kbdev->devfreq_table = kmalloc_array(count,
 			sizeof(struct kbase_devfreq_opp), GFP_KERNEL);
-	if (!kbdev->opp_table)
+	if (!kbdev->devfreq_table)
 		return -ENOMEM;
 
 	for_each_available_child_of_node(opp_node, node) {
-		u64 core_mask;
-		u64 opp_freq, real_freq;
 		const void *core_count_p;
+		u64 core_mask, opp_freq,
+			real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
+		int err;
+#ifdef CONFIG_REGULATOR
+		u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS];
+#endif
 
-		if (of_property_read_u64(node, "opp-hz", &opp_freq)) {
-			dev_warn(kbdev->dev, "OPP is missing required opp-hz property\n");
+		/* Read suspend clock from opp table */
+		if (kbdev->pm.backend.gpu_clock_slow_down_wa)
+			kbasep_devfreq_read_suspend_clock(kbdev, node);
+
+		err = of_property_read_u64(node, "opp-hz", &opp_freq);
+		if (err) {
+			dev_warn(kbdev->dev, "Failed to read opp-hz property with error %d\n",
+					err);
 			continue;
 		}
-		if (of_property_read_u64(node, "opp-hz-real", &real_freq))
-			real_freq = opp_freq;
+
+
+#if BASE_MAX_NR_CLOCKS_REGULATORS > 1
+		err = of_property_read_u64_array(node, "opp-hz-real",
+				real_freqs, kbdev->nr_clocks);
+#else
+		WARN_ON(kbdev->nr_clocks != 1);
+		err = of_property_read_u64(node, "opp-hz-real", real_freqs);
+#endif
+		if (err < 0) {
+			dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n",
+					err);
+			continue;
+		}
+#ifdef CONFIG_REGULATOR
+		err = of_property_read_u32_array(node,
+			"opp-microvolt", opp_volts, kbdev->nr_regulators);
+		if (err < 0) {
+			dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d\n",
+					err);
+			continue;
+		}
+#endif
+
 		if (of_property_read_u64(node, "opp-core-mask", &core_mask))
 			core_mask = shader_present;
-		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) &&
-				core_mask != shader_present) {
+		if (core_mask != shader_present &&
+				(kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11056) ||
+				 corestack_driver_control ||
+				 platform_power_down_only)) {
+
 			dev_warn(kbdev->dev, "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
 					opp_freq);
 			continue;
@@ -360,12 +468,27 @@
 			return -ENODEV;
 		}
 
-		kbdev->opp_table[i].opp_freq = opp_freq;
-		kbdev->opp_table[i].real_freq = real_freq;
-		kbdev->opp_table[i].core_mask = core_mask;
+		kbdev->devfreq_table[i].opp_freq = opp_freq;
+		kbdev->devfreq_table[i].core_mask = core_mask;
+		if (kbdev->nr_clocks > 0) {
+			int j;
 
-		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu real_freq=%llu core_mask=%llx\n",
-				i, opp_freq, real_freq, core_mask);
+			for (j = 0; j < kbdev->nr_clocks; j++)
+				kbdev->devfreq_table[i].real_freqs[j] =
+					real_freqs[j];
+		}
+#ifdef CONFIG_REGULATOR
+		if (kbdev->nr_regulators > 0) {
+			int j;
+
+			for (j = 0; j < kbdev->nr_regulators; j++)
+				kbdev->devfreq_table[i].opp_volts[j] =
+						opp_volts[j];
+		}
+#endif
+
+		dev_info(kbdev->dev, "OPP %d : opp_freq=%llu core_mask=%llx\n",
+				i, opp_freq, core_mask);
 
 		i++;
 	}
@@ -373,30 +496,131 @@
 	kbdev->num_opps = i;
 
 	return 0;
+#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+
+static const char *kbase_devfreq_req_type_name(enum kbase_devfreq_work_type type)
+{
+	const char *p;
+
+	switch (type) {
+	case DEVFREQ_WORK_NONE:
+		p = "devfreq_none";
+		break;
+	case DEVFREQ_WORK_SUSPEND:
+		p = "devfreq_suspend";
+		break;
+	case DEVFREQ_WORK_RESUME:
+		p = "devfreq_resume";
+		break;
+	default:
+		p = "Unknown devfreq_type";
+	}
+	return p;
+}
+
+static void kbase_devfreq_suspend_resume_worker(struct work_struct *work)
+{
+	struct kbase_devfreq_queue_info *info = container_of(work,
+			struct kbase_devfreq_queue_info, work);
+	struct kbase_device *kbdev = container_of(info, struct kbase_device,
+			devfreq_queue);
+	unsigned long flags;
+	enum kbase_devfreq_work_type type, acted_type;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	type = kbdev->devfreq_queue.req_type;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	acted_type = kbdev->devfreq_queue.acted_type;
+	dev_dbg(kbdev->dev, "Worker handles queued req: %s (acted: %s)\n",
+		kbase_devfreq_req_type_name(type),
+		kbase_devfreq_req_type_name(acted_type));
+	switch (type) {
+	case DEVFREQ_WORK_SUSPEND:
+	case DEVFREQ_WORK_RESUME:
+		if (type != acted_type) {
+			if (type == DEVFREQ_WORK_RESUME)
+				devfreq_resume_device(kbdev->devfreq);
+			else
+				devfreq_suspend_device(kbdev->devfreq);
+			dev_dbg(kbdev->dev, "Devfreq transition occured: %s => %s\n",
+				kbase_devfreq_req_type_name(acted_type),
+				kbase_devfreq_req_type_name(type));
+			kbdev->devfreq_queue.acted_type = type;
+		}
+		break;
+	default:
+		WARN_ON(1);
+	}
+}
+
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0) */
+
+void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
+				       enum kbase_devfreq_work_type work_type)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	unsigned long flags;
+
+	WARN_ON(work_type == DEVFREQ_WORK_NONE);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->devfreq_queue.req_type = work_type;
+	queue_work(kbdev->devfreq_queue.workq, &kbdev->devfreq_queue.work);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	dev_dbg(kbdev->dev, "Enqueuing devfreq req: %s\n",
+		kbase_devfreq_req_type_name(work_type));
+#endif
+}
+
+static int kbase_devfreq_work_init(struct kbase_device *kbdev)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	kbdev->devfreq_queue.req_type = DEVFREQ_WORK_NONE;
+	kbdev->devfreq_queue.acted_type = DEVFREQ_WORK_RESUME;
+
+	kbdev->devfreq_queue.workq = alloc_ordered_workqueue("devfreq_workq", 0);
+	if (!kbdev->devfreq_queue.workq)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->devfreq_queue.work,
+			kbase_devfreq_suspend_resume_worker);
+#endif
+	return 0;
+}
+
+static void kbase_devfreq_work_term(struct kbase_device *kbdev)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	destroy_workqueue(kbdev->devfreq_queue.workq);
+#endif
 }
 
 int kbase_devfreq_init(struct kbase_device *kbdev)
 {
 	struct devfreq_dev_profile *dp;
 	int err;
+	unsigned int i;
 
-	if (!kbdev->clock) {
+	if (kbdev->nr_clocks == 0) {
 		dev_err(kbdev->dev, "Clock not available for devfreq\n");
 		return -ENODEV;
 	}
 
-	/* Can't do devfreq without this table */
-	if (!kbdev->dev_opp_table) {
-		dev_err(kbdev->dev, "Uninitialized devfreq opp table\n");
-		return -ENODEV;
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (kbdev->clocks[i])
+			kbdev->current_freqs[i] =
+				clk_get_rate(kbdev->clocks[i]);
+		else
+			kbdev->current_freqs[i] = 0;
 	}
-
-	kbdev->current_freq = clk_get_rate(kbdev->clock);
-	kbdev->current_nominal_freq = kbdev->current_freq;
+	kbdev->current_nominal_freq = kbdev->current_freqs[0];
 
 	dp = &kbdev->devfreq_profile;
 
-	dp->initial_freq = kbdev->current_freq;
+	dp->initial_freq = kbdev->current_freqs[0];
 	dp->polling_ms = 100;
 	dp->target = kbase_devfreq_target;
 	dp->get_dev_status = kbase_devfreq_status;
@@ -413,14 +637,24 @@
 	};
 
 	err = kbase_devfreq_init_core_mask_table(kbdev);
-	if (err)
+	if (err) {
+		kbase_devfreq_term_freq_table(kbdev);
 		return err;
+	}
+
+	/* Initialise devfreq suspend/resume workqueue */
+	err = kbase_devfreq_work_init(kbdev);
+	if (err) {
+		kbase_devfreq_term_freq_table(kbdev);
+		dev_err(kbdev->dev, "Devfreq initialization failed");
+		return err;
+	}
 
 	kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
 				"simple_ondemand", NULL);
 	if (IS_ERR(kbdev->devfreq)) {
-		kfree(dp->freq_table);
-		return PTR_ERR(kbdev->devfreq);
+		err = PTR_ERR(kbdev->devfreq);
+		goto add_device_failed;
 	}
 
 	/* devfreq_add_device only copies a few of kbdev->dev's fields, so
@@ -465,6 +699,10 @@
 		dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
 	else
 		kbdev->devfreq = NULL;
+add_device_failed:
+	kbase_devfreq_work_term(kbdev);
+
+	kbase_devfreq_term_freq_table(kbdev);
 
 	return err;
 }
@@ -490,5 +728,7 @@
 	else
 		kbdev->devfreq = NULL;
 
-	kfree(kbdev->opp_table);
+	kbase_devfreq_term_core_mask_table(kbdev);
+
+	kbase_devfreq_work_term(kbdev);
 }
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
index 0634038..8c976b2 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,23 @@
 #define _BASE_DEVFREQ_H_
 
 int kbase_devfreq_init(struct kbase_device *kbdev);
+
 void kbase_devfreq_term(struct kbase_device *kbdev);
 
+/**
+ * kbase_devfreq_force_freq - Set GPU frequency on L2 power on/off.
+ * @kbdev:      Device pointer
+ * @freq:       GPU frequency in HZ to be set when
+ *              MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE is enabled
+ */
+void kbase_devfreq_force_freq(struct kbase_device *kbdev, unsigned long freq);
+
+/**
+ * kbase_devfreq_enqueue_work - Enqueue a work item for suspend/resume devfreq.
+ * @kbdev:      Device pointer
+ * @work_type:  The type of the devfreq work item, i.e. suspend or resume
+ */
+void kbase_devfreq_enqueue_work(struct kbase_device *kbdev,
+				enum kbase_devfreq_work_type work_type);
+
 #endif /* _BASE_DEVFREQ_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
index ebc3022..c470a97 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,15 +27,15 @@
 #include <mali_kbase.h>
 #include <backend/gpu/mali_kbase_instr_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
-
 #include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_mmu_hw_direct.h>
+#include <mali_kbase_reset_gpu.h>
 
 #if !defined(CONFIG_MALI_NO_MALI)
 
 
 #ifdef CONFIG_DEBUG_FS
 
-
 int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
 {
 	struct kbase_io_access *old_buf;
@@ -203,28 +203,124 @@
  */
 static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
 {
-	u32 status;
-	u64 address;
-
-	status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS));
-	address = (u64) kbase_reg_read(kbdev,
+	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	u32 status = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(GPU_FAULTSTATUS));
+	u64 address = (u64) kbase_reg_read(kbdev,
 			GPU_CONTROL_REG(GPU_FAULTADDRESS_HI)) << 32;
+
 	address |= kbase_reg_read(kbdev,
 			GPU_CONTROL_REG(GPU_FAULTADDRESS_LO));
 
-	dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
-			status & 0xFF,
-			kbase_exception_name(kbdev, status),
+	if ((gpu_id & GPU_ID2_PRODUCT_MODEL) != GPU_ID2_PRODUCT_TULX) {
+		dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+			status,
+			kbase_exception_name(kbdev, status & 0xFF),
 			address);
-	if (multiple)
-		dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+		if (multiple)
+			dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+	}
+}
+
+static bool kbase_gpu_fault_interrupt(struct kbase_device *kbdev, int multiple)
+{
+	kbase_report_gpu_fault(kbdev, multiple);
+	return false;
+}
+
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbdev->cache_clean_in_progress) {
+		/* If this is called while another clean is in progress, we
+		 * can't rely on the current one to flush any new changes in
+		 * the cache. Instead, trigger another cache clean immediately
+		 * after this one finishes.
+		 */
+		kbdev->cache_clean_queued = true;
+		return;
+	}
+
+	/* Enable interrupt */
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask | CLEAN_CACHES_COMPLETED);
+
+	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+					GPU_COMMAND_CLEAN_INV_CACHES);
+
+	kbdev->cache_clean_in_progress = true;
+}
+
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_gpu_start_cache_clean_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->cache_clean_queued = false;
+	kbdev->cache_clean_in_progress = false;
+	wake_up(&kbdev->cache_clean_wait);
+}
+
+static void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+	u32 irq_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (kbdev->cache_clean_queued) {
+		kbdev->cache_clean_queued = false;
+
+		KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+				GPU_COMMAND_CLEAN_INV_CACHES);
+	} else {
+		/* Disable interrupt */
+		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+				irq_mask & ~CLEAN_CACHES_COMPLETED);
+
+		kbase_gpu_cache_clean_wait_complete(kbdev);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	while (kbdev->cache_clean_in_progress) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		wait_event_interruptible(kbdev->cache_clean_wait,
+				!kbdev->cache_clean_in_progress);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 {
+	bool clear_gpu_fault = false;
+
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
 	if (val & GPU_FAULT)
-		kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+		clear_gpu_fault = kbase_gpu_fault_interrupt(kbdev,
+					val & MULTIPLE_GPU_FAULTS);
 
 	if (val & RESET_COMPLETED)
 		kbase_pm_reset_done(kbdev);
@@ -232,19 +328,42 @@
 	if (val & PRFCNT_SAMPLE_COMPLETED)
 		kbase_instr_hwcnt_sample_done(kbdev);
 
-	if (val & CLEAN_CACHES_COMPLETED)
-		kbase_clean_caches_done(kbdev);
-
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
 
-	/* kbase_pm_check_transitions must be called after the IRQ has been
-	 * cleared. This is because it might trigger further power transitions
-	 * and we don't want to miss the interrupt raised to notify us that
-	 * these further transitions have finished.
+	/* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
+	 * be called after the IRQ has been cleared. This is because it might
+	 * trigger further power transitions and we don't want to miss the
+	 * interrupt raised to notify us that these further transitions have
+	 * finished. The same applies to kbase_clean_caches_done() - if another
+	 * clean was queued, it might trigger another clean, which might
+	 * generate another interrupt which shouldn't be missed.
 	 */
-	if (val & POWER_CHANGED_ALL)
+
+	if (val & CLEAN_CACHES_COMPLETED)
+		kbase_clean_caches_done(kbdev);
+
+	if (val & POWER_CHANGED_ALL) {
 		kbase_pm_power_changed(kbdev);
+	} else if (val & CLEAN_CACHES_COMPLETED) {
+		/* When 'platform_power_down_only' is enabled, the L2 cache is
+		 * not powered down, but flushed before the GPU power down
+		 * (which is done by the platform code). So the L2 state machine
+		 * requests a cache flush. And when that flush completes, the L2
+		 * state machine needs to be re-invoked to proceed with the GPU
+		 * power down.
+		 * If cache line evict messages can be lost when shader cores
+		 * power down then we need to flush the L2 cache before powering
+		 * down cores. When the flush completes, the shaders' state
+		 * machine needs to be re-invoked to proceed with powering down
+		 * cores.
+		 */
+		if (platform_power_down_only ||
+				kbdev->pm.backend.l2_always_on ||
+				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921))
+			kbase_pm_power_changed(kbdev);
+	}
+
 
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
 }
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
index 928efe9..c62f1e5 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014,2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014,2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -50,6 +50,41 @@
  */
 u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset);
 
+/**
+ * kbase_gpu_start_cache_clean - Start a cache clean
+ * @kbdev: Kbase device
+ *
+ * Issue a cache clean and invalidate command to hardware. This function will
+ * take hwaccess_lock.
+ */
+void kbase_gpu_start_cache_clean(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_start_cache_clean_nolock - Start a cache clean
+ * @kbdev: Kbase device
+ *
+ * Issue a cache clean and invalidate command to hardware. hwaccess_lock
+ * must be held by the caller.
+ */
+void kbase_gpu_start_cache_clean_nolock(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_wait_cache_clean - Wait for cache cleaning to finish
+ * @kbdev: Kbase device
+ *
+ * This function will take hwaccess_lock, and may sleep.
+ */
+void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_cache_clean_wait_complete - Called after the cache cleaning is
+ *                                       finished. Would also be called after
+ *                                       the GPU reset.
+ * @kbdev: Kbase device
+ *
+ * Caller must hold the hwaccess_lock.
+ */
+void kbase_gpu_cache_clean_wait_complete(struct kbase_device *kbdev);
 
 /**
  * kbase_gpu_interrupt - GPU interrupt handler
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
index 881d50c..9745df63 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,6 +26,7 @@
  */
 #include <mali_kbase.h>
 #include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_reset_gpu.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_js_internal.h>
@@ -56,14 +57,8 @@
 	if (err)
 		goto fail_interrupts;
 
-	err = kbase_hwaccess_pm_init(kbdev);
-	if (err)
-		goto fail_pm;
-
 	return 0;
 
-fail_pm:
-	kbase_release_interrupts(kbdev);
 fail_interrupts:
 	kbase_pm_runtime_term(kbdev);
 fail_runtime_pm:
@@ -74,7 +69,6 @@
 
 void kbase_backend_early_term(struct kbase_device *kbdev)
 {
-	kbase_hwaccess_pm_term(kbdev);
 	kbase_release_interrupts(kbdev);
 	kbase_pm_runtime_term(kbdev);
 	kbasep_platform_device_term(kbdev);
@@ -84,10 +78,18 @@
 {
 	int err;
 
-	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	err = kbase_hwaccess_pm_init(kbdev);
 	if (err)
 		return err;
 
+	err = kbase_reset_gpu_init(kbdev);
+	if (err)
+		goto fail_reset_gpu_init;
+
+	err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+	if (err)
+		goto fail_pm_powerup;
+
 	err = kbase_backend_timer_init(kbdev);
 	if (err)
 		goto fail_timer;
@@ -106,10 +108,29 @@
 	if (err)
 		goto fail_job_slot;
 
+	/* Do the initialisation of devfreq.
+	 * Devfreq needs backend_timer_init() for completion of its
+	 * initialisation and it also needs to catch the first callback
+	 * occurence of the runtime_suspend event for maintaining state
+	 * coherence with the backend power management, hence needs to be
+	 * placed before the kbase_pm_context_idle().
+	 */
+	err = kbase_backend_devfreq_init(kbdev);
+	if (err)
+		goto fail_devfreq_init;
+
+	/* Idle the GPU and/or cores, if the policy wants it to */
+	kbase_pm_context_idle(kbdev);
+
+	/* Update gpuprops with L2_FEATURES if applicable */
+	kbase_gpuprops_update_l2_features(kbdev);
+
 	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
 
 	return 0;
 
+fail_devfreq_init:
+	kbase_job_slot_term(kbdev);
 fail_job_slot:
 
 #ifdef CONFIG_MALI_DEBUG
@@ -121,15 +142,21 @@
 	kbase_backend_timer_term(kbdev);
 fail_timer:
 	kbase_hwaccess_pm_halt(kbdev);
+fail_pm_powerup:
+	kbase_reset_gpu_term(kbdev);
+fail_reset_gpu_init:
+	kbase_hwaccess_pm_term(kbdev);
 
 	return err;
 }
 
 void kbase_backend_late_term(struct kbase_device *kbdev)
 {
+	kbase_backend_devfreq_term(kbdev);
 	kbase_job_slot_halt(kbdev);
 	kbase_job_slot_term(kbdev);
 	kbase_backend_timer_term(kbdev);
 	kbase_hwaccess_pm_halt(kbdev);
+	kbase_reset_gpu_term(kbdev);
+	kbase_hwaccess_pm_term(kbdev);
 }
-
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
index 39773e6..29018b2 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -114,3 +114,12 @@
 	}
 }
 
+void kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
+		regdump->l2_features = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_FEATURES));
+	}
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
index 6c69132..1d18326 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,117 +30,48 @@
 #include <mali_midg_regmap.h>
 #include <mali_kbase_hwaccess_instr.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
-#include <backend/gpu/mali_kbase_pm_internal.h>
 #include <backend/gpu/mali_kbase_instr_internal.h>
 
-/**
- * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
- * hardware
- *
- * @kbdev: Kbase device
- */
-static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
-{
-	unsigned long flags;
-	unsigned long pm_flags;
-	u32 irq_mask;
-
-	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
-					KBASE_INSTR_STATE_REQUEST_CLEAN);
-
-	/* Enable interrupt */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
-	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask | CLEAN_CACHES_COMPLETED);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-
-	/* clean&invalidate the caches so we're sure the mmu tables for the dump
-	 * buffer is valid */
-	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-					GPU_COMMAND_CLEAN_INV_CACHES);
-	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
-
-	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-}
-
 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 					struct kbase_context *kctx,
-					struct kbase_ioctl_hwcnt_enable *enable)
+					struct kbase_instr_hwcnt_enable *enable)
 {
-	unsigned long flags, pm_flags;
+	unsigned long flags;
 	int err = -EINVAL;
 	u32 irq_mask;
-	int ret;
 	u32 prfcnt_config;
 
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	/* alignment failure */
 	if ((enable->dump_buffer == 0ULL) || (enable->dump_buffer & (2048 - 1)))
 		goto out_err;
 
-	/* Override core availability policy to ensure all cores are available
-	 */
-	kbase_pm_ca_instr_enable(kbdev);
-
-	/* Request the cores early on synchronously - we'll release them on any
-	 * errors (e.g. instrumentation already active) */
-	kbase_pm_request_cores_sync(kbdev, true, true);
-
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
 	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
 		/* Instrumentation is already enabled */
 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		goto out_unrequest_cores;
+		goto out_err;
 	}
 
 	/* Enable interrupt */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
 						PRFCNT_SAMPLE_COMPLETED);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
 	/* In use, this context is the owner */
 	kbdev->hwcnt.kctx = kctx;
 	/* Remember the dump address so we can reprogram it later */
 	kbdev->hwcnt.addr = enable->dump_buffer;
-
-	/* Request the clean */
-	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
-	kbdev->hwcnt.backend.triggered = 0;
-	/* Clean&invalidate the caches so we're sure the mmu tables for the dump
-	 * buffer is valid */
-	ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
-					&kbdev->hwcnt.backend.cache_clean_work);
-	KBASE_DEBUG_ASSERT(ret);
+	kbdev->hwcnt.addr_bytes = enable->dump_buffer_bytes;
 
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 
-	/* Wait for cacheclean to complete */
-	wait_event(kbdev->hwcnt.backend.wait,
-					kbdev->hwcnt.backend.triggered != 0);
-
-	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
-							KBASE_INSTR_STATE_IDLE);
-
-	kbase_pm_request_l2_caches(kbdev);
-
 	/* Configure */
 	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
-	{
-		u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-		u32 product_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID)
-			>> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
-		int arch_v6 = GPU_ID_IS_NEW_FORMAT(product_id);
-
-		if (arch_v6)
-			prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
-	}
-#endif
+	if (enable->use_secondary)
+		prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
 			prfcnt_config | PRFCNT_CONFIG_MODE_OFF);
@@ -184,10 +115,6 @@
 
 	dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
 	return err;
- out_unrequest_cores:
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_pm_release_cores(kbdev, true, true);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
  out_err:
 	return err;
 }
@@ -200,17 +127,20 @@
 	struct kbase_device *kbdev = kctx->kbdev;
 
 	while (1) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
 		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 
 		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
 			/* Instrumentation is not enabled */
 			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 			goto out;
 		}
 
 		if (kbdev->hwcnt.kctx != kctx) {
 			/* Instrumentation has been setup for another context */
 			spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 			goto out;
 		}
 
@@ -218,6 +148,7 @@
 			break;
 
 		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
 		/* Ongoing dump/setup - wait for its completion */
 		wait_event(kbdev->hwcnt.backend.wait,
@@ -228,7 +159,6 @@
 	kbdev->hwcnt.backend.triggered = 0;
 
 	/* Disable interrupt */
-	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
 	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
 				irq_mask & ~PRFCNT_SAMPLE_COMPLETED);
@@ -238,15 +168,10 @@
 
 	kbdev->hwcnt.kctx = NULL;
 	kbdev->hwcnt.addr = 0ULL;
+	kbdev->hwcnt.addr_bytes = 0ULL;
 
-	kbase_pm_ca_instr_disable(kbdev);
-
-	kbase_pm_release_cores(kbdev, true, true);
-
-	kbase_pm_release_l2_caches(kbdev);
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
 
 	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
 									kctx);
@@ -331,33 +256,34 @@
 void kbasep_cache_clean_worker(struct work_struct *data)
 {
 	struct kbase_device *kbdev;
-	unsigned long flags;
+	unsigned long flags, pm_flags;
 
 	kbdev = container_of(data, struct kbase_device,
 						hwcnt.backend.cache_clean_work);
 
-	mutex_lock(&kbdev->cacheclean_lock);
-	kbasep_instr_hwcnt_cacheclean(kbdev);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
+	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+	/* Clean and invalidate the caches so we're sure the mmu tables for the
+	 * dump buffer is valid.
+	 */
+	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
+	kbase_gpu_start_cache_clean_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
+
+	kbase_gpu_wait_cache_clean(kbdev);
 
 	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	/* Wait for our condition, and any reset to complete */
-	while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-		wait_event(kbdev->hwcnt.backend.cache_clean_wait,
-				kbdev->hwcnt.backend.state !=
-						KBASE_INSTR_STATE_CLEANING);
-		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	}
 	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
-						KBASE_INSTR_STATE_CLEANED);
-
+					KBASE_INSTR_STATE_REQUEST_CLEAN);
 	/* All finished and idle */
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
 	kbdev->hwcnt.backend.triggered = 1;
 	wake_up(&kbdev->hwcnt.backend.wait);
 
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-	mutex_unlock(&kbdev->cacheclean_lock);
 }
 
 void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
@@ -389,40 +315,13 @@
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 }
 
-void kbase_clean_caches_done(struct kbase_device *kbdev)
-{
-	u32 irq_mask;
-
-	if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
-		unsigned long flags;
-		unsigned long pm_flags;
-
-		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-		/* Disable interrupt */
-		spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
-		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask & ~CLEAN_CACHES_COMPLETED);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-
-		/* Wakeup... */
-		if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
-			/* Only wake if we weren't resetting */
-			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
-			wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
-		}
-
-		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-	}
-}
-
 int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
 	unsigned long flags;
 	int err;
 
-	/* Wait for dump & cacheclean to complete */
+	/* Wait for dump & cache clean to complete */
 	wait_event(kbdev->hwcnt.backend.wait,
 					kbdev->hwcnt.backend.triggered != 0);
 
@@ -477,7 +376,6 @@
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
 
 	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
-	init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
 	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
 						kbasep_cache_clean_worker);
 	kbdev->hwcnt.backend.triggered = 0;
@@ -494,4 +392,3 @@
 {
 	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
 }
-
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
index fb55d2d..c9fb759 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014, 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,11 +39,6 @@
 	KBASE_INSTR_STATE_DUMPING,
 	/* We've requested a clean to occur on a workqueue */
 	KBASE_INSTR_STATE_REQUEST_CLEAN,
-	/* Hardware is currently cleaning and invalidating caches. */
-	KBASE_INSTR_STATE_CLEANING,
-	/* Cache clean completed, and either a) a dump is complete, or
-	 * b) instrumentation can now be setup. */
-	KBASE_INSTR_STATE_CLEANED,
 	/* An error has occured during DUMPING (page fault). */
 	KBASE_INSTR_STATE_FAULT
 };
@@ -54,7 +49,6 @@
 	int triggered;
 
 	enum kbase_instr_state state;
-	wait_queue_head_t cache_clean_wait;
 	struct workqueue_struct *cache_clean_wq;
 	struct work_struct  cache_clean_work;
 };
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
index 608379e..2254b9f 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,12 +36,6 @@
 void kbasep_cache_clean_worker(struct work_struct *data);
 
 /**
- * kbase_clean_caches_done() - Cache clean interrupt received
- * @kbdev: Kbase device
- */
-void kbase_clean_caches_done(struct kbase_device *kbdev);
-
-/**
  * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
  * @kbdev: Kbase device
  */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
index dd0279a..fa3d2cc 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016,2018-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -49,12 +49,11 @@
 	struct kbase_device *kbdev = kbase_untag(data);
 	u32 val;
 
-	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	if (!kbdev->pm.backend.gpu_powered) {
 		/* GPU is turned off - IRQ is not for us */
-		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
-									flags);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return IRQ_NONE;
 	}
 
@@ -65,15 +64,18 @@
 		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
 				__func__, irq, val);
 #endif /* CONFIG_MALI_DEBUG */
-	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
 
-	if (!val)
+	if (!val) {
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return IRQ_NONE;
+	}
 
 	dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
 
 	kbase_job_done(kbdev, val);
 
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
 	return IRQ_HANDLED;
 }
 
@@ -85,12 +87,11 @@
 	struct kbase_device *kbdev = kbase_untag(data);
 	u32 val;
 
-	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	if (!kbdev->pm.backend.gpu_powered) {
 		/* GPU is turned off - IRQ is not for us */
-		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
-									flags);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return IRQ_NONE;
 	}
 
@@ -103,7 +104,7 @@
 		dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
 				__func__, irq, val);
 #endif /* CONFIG_MALI_DEBUG */
-	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (!val) {
 		atomic_dec(&kbdev->faults_pending);
@@ -125,12 +126,11 @@
 	struct kbase_device *kbdev = kbase_untag(data);
 	u32 val;
 
-	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	if (!kbdev->pm.backend.gpu_powered) {
 		/* GPU is turned off - IRQ is not for us */
-		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
-									flags);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return IRQ_NONE;
 	}
 
@@ -141,7 +141,7 @@
 		dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
 				__func__, irq, val);
 #endif /* CONFIG_MALI_DEBUG */
-	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (!val)
 		return IRQ_NONE;
@@ -230,18 +230,17 @@
 	struct kbase_device *kbdev = kbase_untag(data);
 	u32 val;
 
-	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	if (!kbdev->pm.backend.gpu_powered) {
 		/* GPU is turned off - IRQ is not for us */
-		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
-									flags);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return IRQ_NONE;
 	}
 
 	val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
 
-	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (!val)
 		return IRQ_NONE;
@@ -262,18 +261,17 @@
 	struct kbase_device *kbdev = kbase_untag(data);
 	u32 val;
 
-	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	if (!kbdev->pm.backend.gpu_powered) {
 		/* GPU is turned off - IRQ is not for us */
-		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
-									flags);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return IRQ_NONE;
 	}
 
 	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
 
-	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (!val)
 		return IRQ_NONE;
@@ -471,4 +469,6 @@
 	}
 }
 
+KBASE_EXPORT_TEST_API(kbase_synchronize_irqs);
+
 #endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
index fee19aa..d4f96c8 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,14 +27,12 @@
 #include <mali_kbase.h>
 #include <mali_kbase_config.h>
 #include <mali_midg_regmap.h>
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-#include <mali_kbase_gator.h>
-#endif
-#include <mali_kbase_tlstream.h>
-#include <mali_kbase_vinstr.h>
+#include <mali_kbase_tracepoints.h>
 #include <mali_kbase_hw.h>
 #include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_hwcnt_context.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
@@ -42,11 +40,7 @@
 #define beenthere(kctx, f, a...) \
 			dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
 
-#if KBASE_GPU_RESET_EN
-static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
-static void kbasep_reset_timeout_worker(struct work_struct *data);
-static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
-#endif /* KBASE_GPU_RESET_EN */
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev);
 
 static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
 						struct kbase_context *kctx)
@@ -77,7 +71,7 @@
 		struct mali_base_gpu_coherent_group_info *coherency_info =
 			&kbdev->gpu_props.props.coherency_info;
 
-		affinity = kbase_pm_ca_get_core_mask(kbdev) &
+		affinity = kbdev->pm.backend.shaders_avail &
 				kbdev->pm.debug_core_mask[js];
 
 		/* JS2 on a dual core group system targets core group 1. All
@@ -89,7 +83,7 @@
 			affinity &= coherency_info->group[0].core_mask;
 	} else {
 		/* Use all cores */
-		affinity = kbase_pm_ca_get_core_mask(kbdev) &
+		affinity = kbdev->pm.backend.shaders_avail &
 				kbdev->pm.debug_core_mask[js];
 	}
 
@@ -141,6 +135,8 @@
 	if (0 != (katom->core_req & BASE_JD_REQ_SKIP_CACHE_END) &&
 			!(kbdev->serialize_jobs & KBASE_SERIALIZE_RESET))
 		cfg |= JS_CONFIG_END_FLUSH_NO_ACTION;
+	else if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE))
+		cfg |= JS_CONFIG_END_FLUSH_CLEAN;
 	else
 		cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
 
@@ -185,19 +181,19 @@
 	KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
 							(u32)affinity);
 
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-	kbase_trace_mali_job_slots_event(
-				GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
-				kctx, kbase_jd_atom_id(kctx, katom));
-#endif
-	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(katom, jc_head,
+	KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, kctx,
+		js, kbase_jd_atom_id(kctx, katom), TL_JS_EVENT_START);
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(kbdev, katom, jc_head,
 			affinity, cfg);
 	KBASE_TLSTREAM_TL_RET_CTX_LPU(
+		kbdev,
 		kctx,
 		&kbdev->gpu_props.props.raw_props.js_features[
 			katom->slot_nr]);
-	KBASE_TLSTREAM_TL_RET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
+	KBASE_TLSTREAM_TL_RET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]);
 	KBASE_TLSTREAM_TL_RET_ATOM_LPU(
+			kbdev,
 			katom,
 			&kbdev->gpu_props.props.raw_props.js_features[js],
 			"ctx_nr,atom_nr");
@@ -236,14 +232,12 @@
 						int js,
 						ktime_t end_timestamp)
 {
-	if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
-		struct kbase_jd_atom *katom;
-		ktime_t timestamp_diff;
-		/* The atom in the HEAD */
-		katom = kbase_gpu_inspect(kbdev, js, 0);
+	ktime_t timestamp_diff;
+	struct kbase_jd_atom *katom;
 
-		KBASE_DEBUG_ASSERT(katom != NULL);
-
+	/* Checking the HEAD position for the job slot */
+	katom = kbase_gpu_inspect(kbdev, js, 0);
+	if (katom != NULL) {
 		timestamp_diff = ktime_sub(end_timestamp,
 				katom->start_timestamp);
 		if (ktime_to_ns(timestamp_diff) >= 0) {
@@ -269,21 +263,23 @@
 					int js)
 {
 	KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(
+		kbdev,
 		&kbdev->gpu_props.props.raw_props.js_features[js]);
 }
 
 void kbase_job_done(struct kbase_device *kbdev, u32 done)
 {
-	unsigned long flags;
 	int i;
 	u32 count = 0;
-	ktime_t end_timestamp = ktime_get();
+	ktime_t end_timestamp;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	KBASE_DEBUG_ASSERT(kbdev);
 
 	KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	end_timestamp = ktime_get();
 
 	while (done) {
 		u32 failed = done >> 16;
@@ -310,12 +306,9 @@
 					JOB_SLOT_REG(i, JS_STATUS));
 
 				if (completion_code == BASE_JD_EVENT_STOPPED) {
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-					kbase_trace_mali_job_slots_event(
-						GATOR_MAKE_EVENT(
-						GATOR_JOB_SLOT_SOFT_STOPPED, i),
-								NULL, 0);
-#endif
+					KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(
+						kbdev, NULL,
+						i, 0, TL_JS_EVENT_SOFT_STOP);
 
 					kbasep_trace_tl_event_lpu_softstop(
 						kbdev, i);
@@ -339,6 +332,16 @@
 				}
 
 				kbase_gpu_irq_evict(kbdev, i, completion_code);
+
+				/* Some jobs that encounter a BUS FAULT may result in corrupted
+				 * state causing future jobs to hang. Reset GPU before
+				 * allowing any other jobs on the slot to continue. */
+				if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3076)) {
+					if (completion_code == BASE_JD_EVENT_JOB_BUS_FAULT) {
+						if (kbase_prepare_to_reset_gpu_locked(kbdev))
+							kbase_reset_gpu_locked(kbdev);
+					}
+				}
 			}
 
 			kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
@@ -464,20 +467,16 @@
 								end_timestamp);
 	}
 
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-#if KBASE_GPU_RESET_EN
 	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
 						KBASE_RESET_GPU_COMMITTED) {
 		/* If we're trying to reset the GPU then we might be able to do
 		 * it early (without waiting for a timeout) because some jobs
 		 * have completed
 		 */
-		kbasep_try_reset_gpu_early(kbdev);
+		kbasep_try_reset_gpu_early_locked(kbdev);
 	}
-#endif /* KBASE_GPU_RESET_EN */
 	KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
 }
-KBASE_EXPORT_TEST_API(kbase_job_done);
 
 static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
 					struct kbase_jd_atom *katom)
@@ -545,7 +544,7 @@
 		target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
 
 		/* Mark the point where we issue the soft-stop command */
-		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(target_katom);
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(kbdev, target_katom);
 
 		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
 			int i;
@@ -716,32 +715,15 @@
 #endif
 }
 
-void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
 {
-	unsigned long flags;
-	struct kbase_device *kbdev;
+	struct kbase_device *kbdev = kctx->kbdev;
 	int i;
 
-	KBASE_DEBUG_ASSERT(kctx != NULL);
-	kbdev = kctx->kbdev;
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-
-	/* Cancel any remaining running jobs for this kctx  */
-	mutex_lock(&kctx->jctx.lock);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	/* Invalidate all jobs in context, to prevent re-submitting */
-	for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
-		if (!work_pending(&kctx->jctx.atoms[i].work))
-			kctx->jctx.atoms[i].event_code =
-						BASE_JD_EVENT_JOB_CANCELLED;
-	}
+	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
 		kbase_job_slot_hardstop(kctx, i, NULL);
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	mutex_unlock(&kctx->jctx.lock);
 }
 
 void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
@@ -774,6 +756,7 @@
 		if (katom->sched_priority > priority) {
 			if (!stop_sent)
 				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(
+						kbdev,
 						target_katom);
 
 			kbase_job_slot_softstop(kbdev, js, katom);
@@ -800,7 +783,6 @@
 	if (timeout != 0)
 		goto exit;
 
-#if KBASE_GPU_RESET_EN
 	if (kbase_prepare_to_reset_gpu(kbdev)) {
 		dev_err(kbdev->dev,
 			"Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
@@ -809,15 +791,7 @@
 	}
 
 	/* Wait for the reset to complete */
-	wait_event(kbdev->hwaccess.backend.reset_wait,
-			atomic_read(&kbdev->hwaccess.backend.reset_gpu)
-			== KBASE_RESET_GPU_NOT_PENDING);
-#else
-	dev_warn(kbdev->dev,
-		"Jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
-		ZAP_TIMEOUT);
-
-#endif
+	kbase_reset_gpu_wait(kbdev);
 exit:
 	dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
 
@@ -845,21 +819,7 @@
 
 int kbase_job_slot_init(struct kbase_device *kbdev)
 {
-#if KBASE_GPU_RESET_EN
-	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
-						"Mali reset workqueue", 0, 1);
-	if (NULL == kbdev->hwaccess.backend.reset_workq)
-		return -EINVAL;
-
-	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
-						kbasep_reset_timeout_worker);
-
-	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
-							HRTIMER_MODE_REL);
-	kbdev->hwaccess.backend.reset_timer.function =
-						kbasep_reset_timer_callback;
-#endif
-
+	CSTD_UNUSED(kbdev);
 	return 0;
 }
 KBASE_EXPORT_TEST_API(kbase_job_slot_init);
@@ -871,13 +831,10 @@
 
 void kbase_job_slot_term(struct kbase_device *kbdev)
 {
-#if KBASE_GPU_RESET_EN
-	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
-#endif
+	CSTD_UNUSED(kbdev);
 }
 KBASE_EXPORT_TEST_API(kbase_job_slot_term);
 
-#if KBASE_GPU_RESET_EN
 /**
  * kbasep_check_for_afbc_on_slot() - Check whether AFBC is in use on this slot
  * @kbdev: kbase device pointer
@@ -935,7 +892,6 @@
 
 	return ret;
 }
-#endif /* KBASE_GPU_RESET_EN */
 
 /**
  * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
@@ -992,7 +948,6 @@
 {
 	struct kbase_device *kbdev = kctx->kbdev;
 	bool stopped;
-#if KBASE_GPU_RESET_EN
 	/* We make the check for AFBC before evicting/stopping atoms.  Note
 	 * that no other thread can modify the slots whilst we have the
 	 * hwaccess_lock. */
@@ -1000,12 +955,10 @@
 			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3542)
 			&& kbasep_check_for_afbc_on_slot(kbdev, kctx, js,
 					 target_katom);
-#endif
 
 	stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
 							target_katom,
 							JS_COMMAND_HARD_STOP);
-#if KBASE_GPU_RESET_EN
 	if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
 			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
 			needs_workaround_for_afbc)) {
@@ -1020,7 +973,6 @@
 			kbase_reset_gpu_locked(kbdev);
 		}
 	}
-#endif
 }
 
 /**
@@ -1085,8 +1037,6 @@
 	}
 }
 
-
-#if KBASE_GPU_RESET_EN
 static void kbase_debug_dump_registers(struct kbase_device *kbdev)
 {
 	int i;
@@ -1129,7 +1079,6 @@
 	struct kbase_device *kbdev;
 	ktime_t end_timestamp = ktime_get();
 	struct kbasep_js_device_data *js_devdata;
-	bool try_schedule = false;
 	bool silent = false;
 	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
 
@@ -1147,9 +1096,10 @@
 
 	KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
 
-	/* Suspend vinstr.
-	 * This call will block until vinstr is suspended. */
-	kbase_vinstr_suspend(kbdev->vinstr_ctx);
+	/* Disable GPU hardware counters.
+	 * This call will block until counters are disabled.
+	 */
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
 
 	/* Make sure the timer has completed - this cannot be done from
 	 * interrupt context, so this cannot be done within
@@ -1164,15 +1114,18 @@
 						KBASE_RESET_GPU_NOT_PENDING);
 		kbase_disjoint_state_down(kbdev);
 		wake_up(&kbdev->hwaccess.backend.reset_wait);
-		kbase_vinstr_resume(kbdev->vinstr_ctx);
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return;
 	}
 
 	KBASE_DEBUG_ASSERT(kbdev->irq_reset_flush == false);
 
-	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	spin_lock(&kbdev->hwaccess_lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	spin_lock(&kbdev->mmu_mask_change);
+	kbase_pm_reset_start_locked(kbdev);
+
 	/* We're about to flush out the IRQs and their bottom half's */
 	kbdev->irq_reset_flush = true;
 
@@ -1181,8 +1134,7 @@
 	kbase_pm_disable_interrupts_nolock(kbdev);
 
 	spin_unlock(&kbdev->mmu_mask_change);
-	spin_unlock(&kbdev->hwaccess_lock);
-	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	/* Ensure that any IRQ handlers have finished
 	 * Must be done without any locks IRQ handlers will take */
@@ -1225,7 +1177,8 @@
 	/* Complete any jobs that were still on the GPU */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbdev->protected_mode = false;
-	kbase_backend_reset(kbdev, &end_timestamp);
+	if (!kbdev->pm.backend.protected_entry_transition_override)
+		kbase_backend_reset(kbdev, &end_timestamp);
 	kbase_pm_metrics_update(kbdev, NULL);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
@@ -1244,37 +1197,33 @@
 
 	kbase_pm_enable_interrupts(kbdev);
 
-	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
-						KBASE_RESET_GPU_NOT_PENDING);
-
 	kbase_disjoint_state_down(kbdev);
 
-	wake_up(&kbdev->hwaccess.backend.reset_wait);
-	if (!silent)
-		dev_err(kbdev->dev, "Reset complete");
-
-	if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
-		try_schedule = true;
-
 	mutex_unlock(&js_devdata->runpool_mutex);
 
 	mutex_lock(&kbdev->pm.lock);
 
+	kbase_pm_reset_complete(kbdev);
+
 	/* Find out what cores are required now */
 	kbase_pm_update_cores_state(kbdev);
 
 	/* Synchronously request and wait for those cores, because if
 	 * instrumentation is enabled it would need them immediately. */
-	kbase_pm_check_transitions_sync(kbdev);
+	kbase_pm_wait_for_desired_state(kbdev);
 
 	mutex_unlock(&kbdev->pm.lock);
 
+	atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+						KBASE_RESET_GPU_NOT_PENDING);
+
+	wake_up(&kbdev->hwaccess.backend.reset_wait);
+	if (!silent)
+		dev_err(kbdev->dev, "Reset complete");
+
 	/* Try submitting some jobs to restart processing */
-	if (try_schedule) {
-		KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
-									0);
-		kbase_js_sched_all(kbdev);
-	}
+	KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, 0);
+	kbase_js_sched_all(kbdev);
 
 	/* Process any pending slot updates */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -1283,8 +1232,10 @@
 
 	kbase_pm_context_idle(kbdev);
 
-	/* Release vinstr */
-	kbase_vinstr_resume(kbdev->vinstr_ctx);
+	/* Re-enable GPU hardware counters */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
 }
@@ -1330,7 +1281,7 @@
 	/* To prevent getting incorrect registers when dumping failed job,
 	 * skip early reset.
 	 */
-	if (kbdev->job_fault_debug != false)
+	if (atomic_read(&kbdev->job_fault_debug) > 0)
 		return;
 
 	/* Check that the reset has been committed to (i.e. kbase_reset_gpu has
@@ -1458,23 +1409,25 @@
 	kbasep_try_reset_gpu_early_locked(kbdev);
 }
 
-void kbase_reset_gpu_silent(struct kbase_device *kbdev)
+int kbase_reset_gpu_silent(struct kbase_device *kbdev)
 {
 	if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
 						KBASE_RESET_GPU_NOT_PENDING,
 						KBASE_RESET_GPU_SILENT) !=
 						KBASE_RESET_GPU_NOT_PENDING) {
 		/* Some other thread is already resetting the GPU */
-		return;
+		return -EAGAIN;
 	}
 
 	kbase_disjoint_state_up(kbdev);
 
 	queue_work(kbdev->hwaccess.backend.reset_workq,
 			&kbdev->hwaccess.backend.reset_work);
+
+	return 0;
 }
 
-bool kbase_reset_gpu_active(struct kbase_device *kbdev)
+bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
 {
 	if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
 			KBASE_RESET_GPU_NOT_PENDING)
@@ -1482,4 +1435,37 @@
 
 	return true;
 }
-#endif /* KBASE_GPU_RESET_EN */
+
+int kbase_reset_gpu_wait(struct kbase_device *kbdev)
+{
+	wait_event(kbdev->hwaccess.backend.reset_wait,
+			atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+			== KBASE_RESET_GPU_NOT_PENDING);
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait);
+
+int kbase_reset_gpu_init(struct kbase_device *kbdev)
+{
+	kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+						"Mali reset workqueue", 0, 1);
+	if (kbdev->hwaccess.backend.reset_workq == NULL)
+		return -ENOMEM;
+
+	INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+						kbasep_reset_timeout_worker);
+
+	hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+							HRTIMER_MODE_REL);
+	kbdev->hwaccess.backend.reset_timer.function =
+						kbasep_reset_timer_callback;
+
+	return 0;
+}
+
+void kbase_reset_gpu_term(struct kbase_device *kbdev)
+{
+	destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
index 831491e..452ddee 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -159,31 +159,11 @@
 void kbase_job_slot_term(struct kbase_device *kbdev);
 
 /**
- * kbase_gpu_cacheclean - Cause a GPU cache clean & flush
+ * kbase_gpu_cache_clean - Cause a GPU cache clean & flush
  * @kbdev: Device pointer
  *
  * Caller must not be in IRQ context
  */
-void kbase_gpu_cacheclean(struct kbase_device *kbdev);
-
-static inline bool kbase_atom_needs_tiler(struct kbase_device *kbdev,
-		base_jd_core_req core_req)
-{
-	return core_req & BASE_JD_REQ_T;
-}
-
-static inline bool kbase_atom_needs_shaders(struct kbase_device *kbdev,
-		base_jd_core_req core_req)
-{
-	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
-		return true;
-	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
-			BASE_JD_REQ_T) {
-		/* Tiler only atom */
-		return false;
-	}
-
-	return true;
-}
+void kbase_gpu_cache_clean(struct kbase_device *kbdev);
 
 #endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
index 79777b7..55440b8 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,8 +29,10 @@
 #include <mali_kbase_hwaccess_jm.h>
 #include <mali_kbase_jm.h>
 #include <mali_kbase_js.h>
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
+#include <mali_kbase_hwcnt_context.h>
 #include <mali_kbase_10969_workaround.h>
+#include <mali_kbase_reset_gpu.h>
 #include <backend/gpu/mali_kbase_cache_policy_backend.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
@@ -296,143 +298,14 @@
 }
 
 
-static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
-						struct kbase_jd_atom *katom);
-
-static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
-						int js,
-						struct kbase_jd_atom *katom)
-{
-	base_jd_core_req core_req = katom->core_req;
-
-	/* NOTE: The following uses a number of FALLTHROUGHs to optimize the
-	 * calls to this function. Ending of the function is indicated by BREAK
-	 * OUT.
-	 */
-	switch (katom->coreref_state) {
-		/* State when job is first attempted to be run */
-	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
-		/* Request the cores */
-		kbase_pm_request_cores(kbdev,
-				kbase_atom_needs_tiler(kbdev, core_req),
-				kbase_atom_needs_shaders(kbdev, core_req));
-
-		/* Proceed to next state */
-		katom->coreref_state =
-		KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
-
-		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
-
-	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
-		{
-			bool cores_ready;
-
-			cores_ready = kbase_pm_cores_requested(kbdev,
-				kbase_atom_needs_tiler(kbdev, core_req),
-				kbase_atom_needs_shaders(kbdev,	core_req));
-
-			if (!cores_ready) {
-				/* Stay in this state and return, to retry at
-				 * this state later.
-				 */
-				KBASE_TRACE_ADD_SLOT_INFO(kbdev,
-				JS_CORE_REF_REGISTER_INUSE_FAILED,
-						katom->kctx, katom,
-						katom->jc, js,
-						(u32) 0);
-				/* *** BREAK OUT: No state transition *** */
-				break;
-			}
-			/* Proceed to next state */
-			katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
-			/* *** BREAK OUT: Cores Ready *** */
-			break;
-		}
-
-	default:
-		KBASE_DEBUG_ASSERT_MSG(false,
-				"Unhandled kbase_atom_coreref_state %d",
-				katom->coreref_state);
-		break;
-	}
-
-	return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
-}
-
-static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
-						struct kbase_jd_atom *katom)
-{
-	base_jd_core_req core_req = katom->core_req;
-
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-	KBASE_DEBUG_ASSERT(katom != NULL);
-
-	switch (katom->coreref_state) {
-	case KBASE_ATOM_COREREF_STATE_READY:
-		/* State where atom was submitted to the HW - just proceed to
-		 * power-down */
-
-		/* *** FALLTHROUGH *** */
-
-	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
-		/* State where cores were requested */
-		kbase_pm_release_cores(kbdev,
-				kbase_atom_needs_tiler(kbdev, core_req),
-				kbase_atom_needs_shaders(kbdev, core_req));
-		break;
-
-	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
-		/* Initial state - nothing required */
-		break;
-
-	default:
-		KBASE_DEBUG_ASSERT_MSG(false,
-						"Unhandled coreref_state: %d",
-							katom->coreref_state);
-		break;
-	}
-
-	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
-}
-
-static void kbasep_js_job_check_deref_cores_nokatom(struct kbase_device *kbdev,
-		base_jd_core_req core_req,
-		enum kbase_atom_coreref_state coreref_state)
-{
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-
-	switch (coreref_state) {
-	case KBASE_ATOM_COREREF_STATE_READY:
-		/* State where atom was submitted to the HW - just proceed to
-		 * power-down */
-
-		/* *** FALLTHROUGH *** */
-
-	case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
-		/* State where cores were requested */
-		kbase_pm_release_cores(kbdev,
-				kbase_atom_needs_tiler(kbdev, core_req),
-				kbase_atom_needs_shaders(kbdev, core_req));
-		break;
-
-	case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
-		/* Initial state - nothing required */
-		break;
-
-	default:
-		KBASE_DEBUG_ASSERT_MSG(false,
-						"Unhandled coreref_state: %d",
-							coreref_state);
-		break;
-	}
-}
-
 static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 					struct kbase_jd_atom *katom,
 					ktime_t *end_timestamp)
 {
 	struct kbase_context *kctx = katom->kctx;
 
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	switch (katom->gpu_rb_state) {
 	case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
 		/* Should be impossible */
@@ -450,11 +323,11 @@
 			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
 
-		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(katom,
+		KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, katom,
 			&kbdev->gpu_props.props.raw_props.js_features
 				[katom->slot_nr]);
-		KBASE_TLSTREAM_TL_NRET_ATOM_AS(katom, &kbdev->as[kctx->as_nr]);
-		KBASE_TLSTREAM_TL_NRET_CTX_LPU(kctx,
+		KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, katom, &kbdev->as[kctx->as_nr]);
+		KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, kctx,
 			&kbdev->gpu_props.props.raw_props.js_features
 				[katom->slot_nr]);
 
@@ -468,26 +341,55 @@
 		break;
 
 	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_TRANSITION:
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_CHECK) &&
+				(katom->protected_state.enter !=
+				KBASE_ATOM_ENTER_PROTECTED_HWCNT)) {
+			kbase_pm_protected_override_disable(kbdev);
+			kbase_pm_update_cores_state_nolock(kbdev);
+		}
+		if (kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.enter ==
+				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2))
+			kbase_pm_protected_entry_override_disable(kbdev);
+		if (!kbase_jd_katom_is_protected(katom) &&
+				(katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_CHECK) &&
+				(katom->protected_state.exit !=
+				KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT)) {
+			kbase_pm_protected_override_disable(kbdev);
+			kbase_pm_update_cores_state_nolock(kbdev);
+		}
+
 		if (katom->protected_state.enter !=
 				KBASE_ATOM_ENTER_PROTECTED_CHECK ||
 				katom->protected_state.exit !=
 				KBASE_ATOM_EXIT_PROTECTED_CHECK)
 			kbdev->protected_mode_transition = false;
-
+		/* If the atom has suspended hwcnt but has not yet entered
+		 * protected mode, then resume hwcnt now. If the GPU is now in
+		 * protected mode then hwcnt will be resumed by GPU reset so
+		 * don't resume it here.
+		 */
 		if (kbase_jd_katom_is_protected(katom) &&
 				((katom->protected_state.enter ==
 				KBASE_ATOM_ENTER_PROTECTED_IDLE_L2) ||
 				 (katom->protected_state.enter ==
-				KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY) ||
-				 (katom->protected_state.enter ==
-				KBASE_ATOM_ENTER_PROTECTED_FINISHED))) {
-			kbase_vinstr_resume(kbdev->vinstr_ctx);
+				KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY))) {
+			WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+			kbdev->protected_mode_hwcnt_desired = true;
+			if (kbdev->protected_mode_hwcnt_disabled) {
+				kbase_hwcnt_context_enable(
+					kbdev->hwcnt_gpu_ctx);
+				kbdev->protected_mode_hwcnt_disabled = false;
+			}
 		}
 
 		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
 			if (katom->atom_flags &
 					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
-				kbdev->l2_users_count--;
+				kbase_pm_protected_l2_override(kbdev, false);
 				katom->atom_flags &=
 					~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
 			}
@@ -512,6 +414,8 @@
 static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
 						struct kbase_jd_atom *katom)
 {
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	kbase_gpu_release_atom(kbdev, katom, NULL);
 	katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
 }
@@ -630,9 +534,7 @@
 
 	/* The protected mode disable callback will be called as part of reset
 	 */
-	kbase_reset_gpu_silent(kbdev);
-
-	return 0;
+	return kbase_reset_gpu_silent(kbdev);
 }
 
 static int kbase_jm_protected_entry(struct kbase_device *kbdev,
@@ -640,6 +542,8 @@
 {
 	int err = 0;
 
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	err = kbase_gpu_protected_mode_enter(kbdev);
 
 	/*
@@ -648,14 +552,23 @@
 	 */
 
 	kbdev->protected_mode_transition = false;
+	kbase_pm_protected_override_disable(kbdev);
+	kbase_pm_update_cores_state_nolock(kbdev);
 
-	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev);
+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
 	if (err) {
 		/*
 		 * Failed to switch into protected mode, resume
-		 * vinstr core and fail atom.
+		 * GPU hwcnt and fail atom.
 		 */
-		kbase_vinstr_resume(kbdev->vinstr_ctx);
+		WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+		kbdev->protected_mode_hwcnt_desired = true;
+		if (kbdev->protected_mode_hwcnt_disabled) {
+			kbase_hwcnt_context_enable(
+				kbdev->hwcnt_gpu_ctx);
+			kbdev->protected_mode_hwcnt_disabled = false;
+		}
+
 		katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
 		kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
 		/*
@@ -692,35 +605,56 @@
 {
 	int err = 0;
 
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	switch (katom[idx]->protected_state.enter) {
 	case KBASE_ATOM_ENTER_PROTECTED_CHECK:
-		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev);
+		KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev);
 		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
 		 * should ensure that we are not already transitiong, and that
 		 * there are no atoms currently on the GPU. */
 		WARN_ON(kbdev->protected_mode_transition);
 		WARN_ON(kbase_gpu_atoms_submitted_any(kbdev));
+		/* If hwcnt is disabled, it means we didn't clean up correctly
+		 * during last exit from protected mode.
+		 */
+		WARN_ON(kbdev->protected_mode_hwcnt_disabled);
+
+		katom[idx]->protected_state.enter =
+			KBASE_ATOM_ENTER_PROTECTED_HWCNT;
 
 		kbdev->protected_mode_transition = true;
-		katom[idx]->protected_state.enter =
-			KBASE_ATOM_ENTER_PROTECTED_VINSTR;
 
 		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 
-	case KBASE_ATOM_ENTER_PROTECTED_VINSTR:
-		if (kbase_vinstr_try_suspend(kbdev->vinstr_ctx) < 0) {
-			/*
-			 * We can't switch now because
-			 * the vinstr core state switch
-			 * is not done yet.
-			 */
+	case KBASE_ATOM_ENTER_PROTECTED_HWCNT:
+		/* See if we can get away with disabling hwcnt atomically */
+		kbdev->protected_mode_hwcnt_desired = false;
+		if (!kbdev->protected_mode_hwcnt_disabled) {
+			if (kbase_hwcnt_context_disable_atomic(
+				kbdev->hwcnt_gpu_ctx))
+				kbdev->protected_mode_hwcnt_disabled = true;
+		}
+
+		/* We couldn't disable atomically, so kick off a worker */
+		if (!kbdev->protected_mode_hwcnt_disabled) {
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+			queue_work(system_wq,
+				&kbdev->protected_mode_hwcnt_disable_work);
+#else
+			queue_work(system_highpri_wq,
+				&kbdev->protected_mode_hwcnt_disable_work);
+#endif
 			return -EAGAIN;
 		}
 
 		/* Once reaching this point GPU must be
-		 * switched to protected mode or vinstr
+		 * switched to protected mode or hwcnt
 		 * re-enabled. */
 
+		if (kbase_pm_protected_entry_override_enable(kbdev))
+			return -EAGAIN;
+
 		/*
 		 * Not in correct mode, begin protected mode switch.
 		 * Entering protected mode requires us to power down the L2,
@@ -729,13 +663,29 @@
 		katom[idx]->protected_state.enter =
 			KBASE_ATOM_ENTER_PROTECTED_IDLE_L2;
 
-		kbase_pm_update_cores_state_nolock(kbdev);
+		kbase_pm_protected_override_enable(kbdev);
+		/*
+		 * Only if the GPU reset hasn't been initiated, there is a need
+		 * to invoke the state machine to explicitly power down the
+		 * shader cores and L2.
+		 */
+		if (!kbdev->pm.backend.protected_entry_transition_override)
+			kbase_pm_update_cores_state_nolock(kbdev);
 
 		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 
 	case KBASE_ATOM_ENTER_PROTECTED_IDLE_L2:
 		/* Avoid unnecessary waiting on non-ACE platforms. */
-		if (kbdev->current_gpu_coherency_mode == COHERENCY_ACE) {
+		if (kbdev->system_coherency == COHERENCY_ACE) {
+			if (kbdev->pm.backend.l2_always_on) {
+				/*
+				 * If the GPU reset hasn't completed, then L2
+				 * could still be powered up.
+				 */
+				if (kbase_reset_gpu_is_active(kbdev))
+					return -EAGAIN;
+			}
+
 			if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
 				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
 				/*
@@ -759,12 +709,15 @@
 		 */
 		kbase_gpu_disable_coherent(kbdev);
 
+		kbase_pm_protected_entry_override_disable(kbdev);
+
 		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TGOX_R1_1234)) {
 			/*
 			 * Power on L2 caches; this will also result in the
 			 * correct value written to coherency enable register.
 			 */
-			kbase_pm_request_l2_caches_nolock(kbdev);
+			kbase_pm_protected_l2_override(kbdev, true);
+
 			/*
 			 * Set the flag on the atom that additional
 			 * L2 references are taken.
@@ -787,14 +740,15 @@
 			 * Check that L2 caches are powered and, if so,
 			 * enter protected mode.
 			 */
-			if (kbdev->pm.backend.l2_powered != 0) {
+			if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
 				/*
 				 * Remove additional L2 reference and reset
 				 * the atom flag which denotes it.
 				 */
 				if (katom[idx]->atom_flags &
 					KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT) {
-					kbdev->l2_users_count--;
+					kbase_pm_protected_l2_override(kbdev,
+							false);
 					katom[idx]->atom_flags &=
 						~KBASE_KATOM_FLAG_HOLDING_L2_REF_PROT;
 				}
@@ -825,10 +779,11 @@
 {
 	int err = 0;
 
+	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	switch (katom[idx]->protected_state.exit) {
 	case KBASE_ATOM_EXIT_PROTECTED_CHECK:
-		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev);
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(kbdev, kbdev);
 		/* The checks in KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV
 		 * should ensure that we are not already transitiong, and that
 		 * there are no atoms currently on the GPU. */
@@ -844,12 +799,12 @@
 				KBASE_ATOM_EXIT_PROTECTED_IDLE_L2;
 
 		kbdev->protected_mode_transition = true;
+		kbase_pm_protected_override_enable(kbdev);
 		kbase_pm_update_cores_state_nolock(kbdev);
 
 		/* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
 	case KBASE_ATOM_EXIT_PROTECTED_IDLE_L2:
-		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2) ||
-				kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_L2)) {
+		if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) {
 			/*
 			 * The L2 is still powered, wait for all the users to
 			 * finish with it before doing the actual reset.
@@ -865,8 +820,12 @@
 		/* Issue the reset to the GPU */
 		err = kbase_gpu_protected_mode_reset(kbdev);
 
+		if (err == -EAGAIN)
+			return -EAGAIN;
+
 		if (err) {
 			kbdev->protected_mode_transition = false;
+			kbase_pm_protected_override_disable(kbdev);
 
 			/* Failed to exit protected mode, fail atom */
 			katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
@@ -880,7 +839,16 @@
 				kbase_jm_return_atom_to_js(kbdev, katom[idx]);
 			}
 
-			kbase_vinstr_resume(kbdev->vinstr_ctx);
+			/* If we're exiting from protected mode, hwcnt must have
+			 * been disabled during entry.
+			 */
+			WARN_ON(!kbdev->protected_mode_hwcnt_disabled);
+			kbdev->protected_mode_hwcnt_desired = true;
+			if (kbdev->protected_mode_hwcnt_disabled) {
+				kbase_hwcnt_context_enable(
+					kbdev->hwcnt_gpu_ctx);
+				kbdev->protected_mode_hwcnt_disabled = false;
+			}
 
 			return -EINVAL;
 		}
@@ -909,6 +877,9 @@
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
+	if (kbase_reset_gpu_is_active(kbdev))
+		return;
+
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 		struct kbase_jd_atom *katom[2];
 		int idx;
@@ -1014,9 +985,8 @@
 					break;
 				}
 
-				cores_ready =
-					kbasep_js_job_check_ref_cores(kbdev, js,
-								katom[idx]);
+				cores_ready = kbase_pm_cores_requested(kbdev,
+						true);
 
 				if (katom[idx]->event_code ==
 						BASE_JD_EVENT_PM_EVENT) {
@@ -1070,7 +1040,7 @@
 
 				if ((kbdev->serialize_jobs &
 						KBASE_SERIALIZE_RESET) &&
-						kbase_reset_gpu_active(kbdev))
+						kbase_reset_gpu_is_active(kbdev))
 					break;
 
 				/* Check if this job needs the cycle counter
@@ -1153,12 +1123,12 @@
 		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
 
 		if (completion_code == BASE_JD_EVENT_STOPPED) {
-			KBASE_TLSTREAM_TL_NRET_ATOM_LPU(next_katom,
+			KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom,
 				&kbdev->gpu_props.props.raw_props.js_features
 					[next_katom->slot_nr]);
-			KBASE_TLSTREAM_TL_NRET_ATOM_AS(next_katom, &kbdev->as
+			KBASE_TLSTREAM_TL_NRET_ATOM_AS(kbdev, next_katom, &kbdev->as
 					[next_katom->kctx->as_nr]);
-			KBASE_TLSTREAM_TL_NRET_CTX_LPU(next_katom->kctx,
+			KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx,
 				&kbdev->gpu_props.props.raw_props.js_features
 					[next_katom->slot_nr]);
 		}
@@ -1204,19 +1174,11 @@
 		 * corruption we need to flush the cache manually before any
 		 * affected memory gets reused. */
 		katom->need_cache_flush_cores_retained = true;
-		kbase_pm_request_cores(kbdev,
-				kbase_atom_needs_tiler(kbdev, katom->core_req),
-				kbase_atom_needs_shaders(kbdev,
-						katom->core_req));
 	} else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
 		if (kbdev->gpu_props.num_core_groups > 1 &&
 				katom->device_nr >= 1) {
 			dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
 			katom->need_cache_flush_cores_retained = true;
-			kbase_pm_request_cores(kbdev,
-				kbase_atom_needs_tiler(kbdev, katom->core_req),
-				kbase_atom_needs_shaders(kbdev,
-						katom->core_req));
 		}
 	}
 
@@ -1408,10 +1370,6 @@
 				break;
 			if (katom->protected_state.exit ==
 			    KBASE_ATOM_EXIT_PROTECTED_RESET_WAIT) {
-				KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev);
-
-				kbase_vinstr_resume(kbdev->vinstr_ctx);
-
 				/* protected mode sanity checks */
 				KBASE_DEBUG_ASSERT_MSG(
 					kbase_jd_katom_is_protected(katom) == kbase_gpu_in_protected_mode(kbdev),
@@ -1422,7 +1380,8 @@
 					!kbase_jd_katom_is_protected(katom),
 					"Protected atom on JS%d not supported", js);
 			}
-			if (katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED)
+			if ((katom->gpu_rb_state < KBASE_ATOM_GPU_RB_SUBMITTED) &&
+			    !kbase_ctx_flag(katom->kctx, KCTX_DYING))
 				keep_in_jm_rb = true;
 
 			kbase_gpu_release_atom(kbdev, katom, NULL);
@@ -1433,8 +1392,6 @@
 			 * it will be processed again from the starting state.
 			 */
 			if (keep_in_jm_rb) {
-				kbasep_js_job_check_deref_cores(kbdev, katom);
-				katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
 				katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
 				/* As the atom was not removed, increment the
 				 * index so that we read the correct atom in the
@@ -1453,7 +1410,19 @@
 		}
 	}
 
+	/* Re-enable GPU hardware counters if we're resetting from protected
+	 * mode.
+	 */
+	kbdev->protected_mode_hwcnt_desired = true;
+	if (kbdev->protected_mode_hwcnt_disabled) {
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+		kbdev->protected_mode_hwcnt_disabled = false;
+
+		KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(kbdev, kbdev);
+	}
+
 	kbdev->protected_mode_transition = false;
+	kbase_pm_protected_override_disable(kbdev);
 }
 
 static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
@@ -1474,6 +1443,8 @@
 						u32 action,
 						bool disjoint)
 {
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
 	kbase_gpu_mark_atom_for_return(kbdev, katom);
 	katom->kctx->blocked_js[katom->slot_nr][katom->sched_priority] = true;
@@ -1697,52 +1668,13 @@
 	return ret;
 }
 
-void kbase_gpu_cacheclean(struct kbase_device *kbdev)
-{
-	/* Limit the number of loops to avoid a hang if the interrupt is missed
-	 */
-	u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
-
-	mutex_lock(&kbdev->cacheclean_lock);
-
-	/* use GPU_COMMAND completion solution */
-	/* clean & invalidate the caches */
-	KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-					GPU_COMMAND_CLEAN_INV_CACHES);
-
-	/* wait for cache flush to complete before continuing */
-	while (--max_loops &&
-		(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) &
-						CLEAN_CACHES_COMPLETED) == 0)
-		;
-
-	/* clear the CLEAN_CACHES_COMPLETED irq */
-	KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
-							CLEAN_CACHES_COMPLETED);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
-						CLEAN_CACHES_COMPLETED);
-	KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
-						KBASE_INSTR_STATE_CLEANING,
-	    "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
-
-	mutex_unlock(&kbdev->cacheclean_lock);
-}
-
-void kbase_backend_cacheclean(struct kbase_device *kbdev,
+void kbase_backend_cache_clean(struct kbase_device *kbdev,
 		struct kbase_jd_atom *katom)
 {
 	if (katom->need_cache_flush_cores_retained) {
-		unsigned long flags;
+		kbase_gpu_start_cache_clean(kbdev);
+		kbase_gpu_wait_cache_clean(kbdev);
 
-		kbase_gpu_cacheclean(kbdev);
-
-		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		kbase_pm_release_cores(kbdev,
-				kbase_atom_needs_tiler(kbdev, katom->core_req),
-				kbase_atom_needs_shaders(kbdev,
-						katom->core_req));
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		katom->need_cache_flush_cores_retained = false;
 	}
 }
@@ -1754,7 +1686,7 @@
 	 * If cache flush required due to HW workaround then perform the flush
 	 * now
 	 */
-	kbase_backend_cacheclean(kbdev, katom);
+	kbase_backend_cache_clean(kbdev, katom);
 
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969)            &&
 	    (katom->core_req & BASE_JD_REQ_FS)                        &&
@@ -1773,24 +1705,11 @@
 			katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
 		}
 	}
-
-	/* Clear the coreref_state now - while check_deref_cores() may not have
-	 * been called yet, the caller will have taken a copy of this field. If
-	 * this is not done, then if the atom is re-scheduled (following a soft
-	 * stop) then the core reference would not be retaken. */
-	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
 }
 
 void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
-		base_jd_core_req core_req,
-		enum kbase_atom_coreref_state coreref_state)
+		base_jd_core_req core_req)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbasep_js_job_check_deref_cores_nokatom(kbdev, core_req, coreref_state);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
 	if (!kbdev->pm.active_count) {
 		mutex_lock(&kbdev->js_data.runpool_mutex);
 		mutex_lock(&kbdev->pm.lock);
@@ -1829,6 +1748,3 @@
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
-
-
-
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
index df2dd5ec..1ffaa23 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,6 +27,7 @@
 
 #include <mali_kbase.h>
 #include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_reset_gpu.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_js_internal.h>
 
@@ -116,7 +117,7 @@
 			if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
 				u32 ticks = atom->ticks++;
 
-#ifndef CONFIG_MALI_JOB_DUMP
+#if !defined(CONFIG_MALI_JOB_DUMP) && !defined(CONFIG_MALI_VECTOR_DUMP)
 				u32 soft_stop_ticks, hard_stop_ticks,
 								gpu_reset_ticks;
 				if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
@@ -250,14 +251,12 @@
 			}
 		}
 	}
-#if KBASE_GPU_RESET_EN
 	if (reset_needed) {
 		dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
 
 		if (kbase_prepare_to_reset_gpu_locked(kbdev))
 			kbase_reset_gpu_locked(kbdev);
 	}
-#endif /* KBASE_GPU_RESET_EN */
 	/* the timer is re-issued if there is contexts in the run-pool */
 
 	if (backend->timer_running)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c
new file mode 100644
index 0000000..916916d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_bits.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include "mali_kbase_l2_mmu_config.h"
+
+/**
+ * struct l2_mmu_config_limit_region
+ *
+ * @value:    The default value to load into the L2_MMU_CONFIG register
+ * @mask:     The shifted mask of the field in the L2_MMU_CONFIG register
+ * @shift:    The shift of where the field starts in the L2_MMU_CONFIG register
+ *            This should be the same value as the smaller of the two mask
+ *            values
+ */
+struct l2_mmu_config_limit_region {
+	u32 value, mask, shift;
+};
+
+/**
+ * struct l2_mmu_config_limit
+ *
+ * @product_model:    The GPU for which this entry applies
+ * @read:             Values for the read limit field
+ * @write:            Values for the write limit field
+ */
+struct l2_mmu_config_limit {
+	u32 product_model;
+	struct l2_mmu_config_limit_region read;
+	struct l2_mmu_config_limit_region write;
+};
+
+/*
+ * Zero represents no limit
+ *
+ * For LBEX TBEX TTRX and TNAX:
+ *   The value represents the number of outstanding reads (6 bits) or writes (5 bits)
+ *
+ * For all other GPUS it is a fraction see: mali_kbase_config_defaults.h
+ */
+static const struct l2_mmu_config_limit limits[] = {
+	 /* GPU                       read                  write            */
+	 {GPU_ID2_PRODUCT_LBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} },
+	 {GPU_ID2_PRODUCT_TBEX, {0, GENMASK(10, 5), 5}, {0, GENMASK(16, 12), 12} },
+	 {GPU_ID2_PRODUCT_TTRX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} },
+	 {GPU_ID2_PRODUCT_TNAX, {0, GENMASK(12, 7), 7}, {0, GENMASK(17, 13), 13} },
+	 {GPU_ID2_PRODUCT_TGOX,
+	   {KBASE_3BIT_AID_32, GENMASK(14, 12), 12},
+	   {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} },
+	 {GPU_ID2_PRODUCT_TNOX,
+	   {KBASE_3BIT_AID_32, GENMASK(14, 12), 12},
+	   {KBASE_3BIT_AID_32, GENMASK(17, 15), 15} },
+};
+
+void kbase_set_mmu_quirks(struct kbase_device *kbdev)
+{
+	/* All older GPUs had 2 bits for both fields, this is a default */
+	struct l2_mmu_config_limit limit = {
+		  0, /* Any GPU not in the limits array defined above */
+		 {KBASE_AID_32, GENMASK(25, 24), 24},
+		 {KBASE_AID_32, GENMASK(27, 26), 26}
+		};
+	u32 product_model, gpu_id;
+	u32 mmu_config;
+	int i;
+
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_model = gpu_id & GPU_ID2_PRODUCT_MODEL;
+
+	/* Limit the GPU bus bandwidth if the platform needs this. */
+	for (i = 0; i < ARRAY_SIZE(limits); i++) {
+		if (product_model == limits[i].product_model) {
+			limit = limits[i];
+			break;
+		}
+	}
+
+	mmu_config = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG));
+
+	mmu_config &= ~(limit.read.mask | limit.write.mask);
+	/* Can't use FIELD_PREP() macro here as the mask isn't constant */
+	mmu_config |= (limit.read.value << limit.read.shift) |
+		      (limit.write.value << limit.write.shift);
+
+	kbdev->hw_quirks_mmu = mmu_config;
+
+	if (kbdev->system_coherency == COHERENCY_ACE) {
+		/* Allow memory configuration disparity to be ignored,
+		 * we optimize the use of shared memory and thus we
+		 * expect some disparity in the memory configuration.
+		 */
+		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
+	}
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h
new file mode 100644
index 0000000..25636ee
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_l2_mmu_config.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ *//* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ */
+
+#ifndef _KBASE_L2_MMU_CONFIG_H_
+#define _KBASE_L2_MMU_CONFIG_H_
+/**
+ * kbase_set_mmu_quirks - Set the hw_quirks_mmu field of kbdev
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Use this function to initialise the hw_quirks_mmu field, for instance to set
+ * the MAX_READS and MAX_WRITES to sane defaults for each GPU.
+ */
+void kbase_set_mmu_quirks(struct kbase_device *kbdev);
+
+#endif /* _KBASE_L2_MMU_CONFIG_H */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
index 3e9af77..77e0b78 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,7 +25,7 @@
 #include <mali_kbase.h>
 #include <mali_kbase_mem.h>
 #include <mali_kbase_mmu_hw.h>
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <mali_kbase_as_fault_debugfs.h>
 
@@ -77,7 +77,7 @@
 		val = kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS));
 
 	if (max_loops == 0) {
-		dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+		dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
 		return -1;
 	}
 
@@ -150,6 +150,7 @@
 		struct kbase_as *as;
 		int as_no;
 		struct kbase_context *kctx;
+		struct kbase_fault *fault;
 
 		/*
 		 * the while logic ensures we have a bit set, no need to check
@@ -158,6 +159,12 @@
 		as_no = ffs(bf_bits | pf_bits) - 1;
 		as = &kbdev->as[as_no];
 
+		/* find the fault type */
+		if (bf_bits & (1 << as_no))
+			fault = &as->bf_data;
+		else
+			fault = &as->pf_data;
+
 		/*
 		 * Refcount the kctx ASAP - it shouldn't disappear anyway, since
 		 * Bus/Page faults _should_ only occur whilst jobs are running,
@@ -167,18 +174,15 @@
 		kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
 
 		/* find faulting address */
-		as->fault_addr = kbase_reg_read(kbdev,
-						MMU_AS_REG(as_no,
-							AS_FAULTADDRESS_HI));
-		as->fault_addr <<= 32;
-		as->fault_addr |= kbase_reg_read(kbdev,
-						MMU_AS_REG(as_no,
-							AS_FAULTADDRESS_LO));
-
+		fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
+				AS_FAULTADDRESS_HI));
+		fault->addr <<= 32;
+		fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no,
+				AS_FAULTADDRESS_LO));
 		/* Mark the fault protected or not */
-		as->protected_mode = kbdev->protected_mode;
+		fault->protected_mode = kbdev->protected_mode;
 
-		if (kbdev->protected_mode && as->fault_addr) {
+		if (kbdev->protected_mode && fault->addr) {
 			/* check if address reporting is allowed */
 			validate_protected_page_fault(kbdev);
 		}
@@ -187,24 +191,18 @@
 		kbase_as_fault_debugfs_new(kbdev, as_no);
 
 		/* record the fault status */
-		as->fault_status = kbase_reg_read(kbdev,
-						  MMU_AS_REG(as_no,
-							AS_FAULTSTATUS));
-
-		/* find the fault type */
-		as->fault_type = (bf_bits & (1 << as_no)) ?
-				KBASE_MMU_FAULT_TYPE_BUS :
-				KBASE_MMU_FAULT_TYPE_PAGE;
+		fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
+				AS_FAULTSTATUS));
 
 		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
-			as->fault_extra_addr = kbase_reg_read(kbdev,
+			fault->extra_addr = kbase_reg_read(kbdev,
 					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
-			as->fault_extra_addr <<= 32;
-			as->fault_extra_addr |= kbase_reg_read(kbdev,
+			fault->extra_addr <<= 32;
+			fault->extra_addr |= kbase_reg_read(kbdev,
 					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
 		}
 
-		if (kbase_as_has_bus_fault(as)) {
+		if (kbase_as_has_bus_fault(as, fault)) {
 			/* Mark bus fault as handled.
 			 * Note that a bus fault is processed first in case
 			 * where both a bus fault and page fault occur.
@@ -224,7 +222,7 @@
 
 		/* Process the interrupt for this address space */
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		kbase_mmu_interrupt_process(kbdev, kctx, as);
+		kbase_mmu_interrupt_process(kbdev, kctx, as, fault);
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	}
 
@@ -239,16 +237,20 @@
 void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
 {
 	struct kbase_mmu_setup *current_setup = &as->current_setup;
-	u32 transcfg = 0;
+	u64 transcfg = 0;
 
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU)) {
-		transcfg = current_setup->transcfg & 0xFFFFFFFFUL;
+		transcfg = current_setup->transcfg;
 
 		/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK */
 		/* Clear PTW_MEMATTR bits */
 		transcfg &= ~AS_TRANSCFG_PTW_MEMATTR_MASK;
 		/* Enable correct PTW_MEMATTR bits */
 		transcfg |= AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK;
+		/* Ensure page-tables reads use read-allocate cache-policy in
+		 * the L2
+		 */
+		transcfg |= AS_TRANSCFG_R_ALLOCATE;
 
 		if (kbdev->system_coherency == COHERENCY_ACE) {
 			/* Set flag AS_TRANSCFG_PTW_SH_OS (outer shareable) */
@@ -261,7 +263,7 @@
 		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
 				transcfg);
 		kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
-				(current_setup->transcfg >> 32) & 0xFFFFFFFFUL);
+				(transcfg >> 32) & 0xFFFFFFFFUL);
 	} else {
 		if (kbdev->system_coherency == COHERENCY_ACE)
 			current_setup->transtab |= AS_TRANSTAB_LPAE_SHARE_OUTER;
@@ -277,7 +279,7 @@
 	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
 			(current_setup->memattr >> 32) & 0xFFFFFFFFUL);
 
-	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as,
+	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as,
 			current_setup->transtab,
 			current_setup->memattr,
 			transcfg);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
index 1f76eed..0a3fa7e 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,8 +29,8 @@
  * register access implementation of the MMU hardware interface
  */
 
-#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
-#define _MALI_KBASE_MMU_HW_DIRECT_H_
+#ifndef _KBASE_MMU_HW_DIRECT_H_
+#define _KBASE_MMU_HW_DIRECT_H_
 
 #include <mali_kbase_defs.h>
 
@@ -39,9 +39,24 @@
  *
  * Process the MMU interrupt that was reported by the &kbase_device.
  *
- * @kbdev:          kbase context to clear the fault from.
- * @irq_stat:       Value of the MMU_IRQ_STATUS register
+ * @kbdev:       Pointer to the kbase device for which the interrupt happened.
+ * @irq_stat:    Value of the MMU_IRQ_STATUS register.
  */
 void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
 
-#endif	/* _MALI_KBASE_MMU_HW_DIRECT_H_ */
+/**
+ * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt.
+ *
+ * Process the bus fault interrupt that was reported for a particular GPU
+ * address space.
+ *
+ * @kbdev:       Pointer to the kbase device for which bus fault was reported.
+ * @status:      Value of the GPU_FAULTSTATUS register.
+ * @as_nr:       GPU address space for which the bus fault occurred.
+ *
+ * Return: zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev,
+		u32 status, u32 as_nr);
+
+#endif	/* _KBASE_MMU_HW_DIRECT_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
index a448a3b..0faf677 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -1,6 +1,6 @@
-/*
+ /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,11 +31,15 @@
 
 #include <mali_kbase_pm.h>
 #include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwcnt_context.h>
 #include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_devfreq.h>
 
 static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data);
+static void kbase_pm_hwcnt_disable_worker(struct work_struct *data);
+static void kbase_pm_gpu_clock_control_worker(struct work_struct *data);
 
 int kbase_pm_runtime_init(struct kbase_device *kbdev)
 {
@@ -128,12 +132,12 @@
 	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_wait_work,
 			kbase_pm_gpu_poweroff_wait_wq);
 
+	kbdev->pm.backend.ca_cores_enabled = ~0ull;
 	kbdev->pm.backend.gpu_powered = false;
 	kbdev->pm.suspending = false;
 #ifdef CONFIG_MALI_DEBUG
 	kbdev->pm.backend.driver_ready_for_irqs = false;
 #endif /* CONFIG_MALI_DEBUG */
-	kbdev->pm.backend.gpu_in_desired_state = true;
 	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
 
 	/* Initialise the metrics subsystem */
@@ -141,9 +145,6 @@
 	if (ret)
 		return ret;
 
-	init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
-	kbdev->pm.backend.l2_powered = 0;
-
 	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
 	kbdev->pm.backend.reset_done = false;
 
@@ -151,7 +152,6 @@
 	kbdev->pm.active_count = 0;
 
 	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
-	spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
 
 	init_waitqueue_head(&kbdev->pm.backend.poweroff_wait);
 
@@ -161,8 +161,59 @@
 	if (kbase_pm_policy_init(kbdev) != 0)
 		goto pm_policy_fail;
 
+	if (kbase_pm_state_machine_init(kbdev) != 0)
+		goto pm_state_machine_fail;
+
+	kbdev->pm.backend.hwcnt_desired = false;
+	kbdev->pm.backend.hwcnt_disabled = true;
+	INIT_WORK(&kbdev->pm.backend.hwcnt_disable_work,
+		kbase_pm_hwcnt_disable_worker);
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	/* At runtime, this feature can be enabled via module parameter
+	 * when insmod is executed. Then this will override all workarounds.
+	 */
+	if (platform_power_down_only) {
+		kbdev->pm.backend.gpu_clock_slow_down_wa = false;
+		kbdev->pm.backend.l2_always_on = false;
+
+		return 0;
+	}
+
+	if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) {
+		kbdev->pm.backend.l2_always_on = false;
+		kbdev->pm.backend.gpu_clock_slow_down_wa = false;
+
+		return 0;
+	}
+
+	/* WA1: L2 always_on for GPUs being affected by GPU2017-1336 */
+	if (!IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE)) {
+		kbdev->pm.backend.gpu_clock_slow_down_wa = false;
+		if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336))
+			kbdev->pm.backend.l2_always_on = true;
+		else
+			kbdev->pm.backend.l2_always_on = false;
+
+		return 0;
+	}
+
+	/* WA3: Clock slow down for GPUs being affected by GPU2017-1336 */
+	kbdev->pm.backend.l2_always_on = false;
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2017_1336)) {
+		kbdev->pm.backend.gpu_clock_slow_down_wa = true;
+		kbdev->pm.backend.gpu_clock_suspend_freq = 0;
+		kbdev->pm.backend.gpu_clock_slow_down_desired = true;
+		kbdev->pm.backend.gpu_clock_slowed_down = false;
+		INIT_WORK(&kbdev->pm.backend.gpu_clock_control_work,
+			kbase_pm_gpu_clock_control_worker);
+	} else
+		kbdev->pm.backend.gpu_clock_slow_down_wa = false;
+
 	return 0;
 
+pm_state_machine_fail:
+	kbase_pm_policy_term(kbdev);
 pm_policy_fail:
 	kbase_pm_ca_term(kbdev);
 workq_fail:
@@ -178,6 +229,17 @@
 	 * kbase_pm_clock_off() */
 	kbase_pm_clock_on(kbdev, is_resume);
 
+	if (!is_resume) {
+		unsigned long flags;
+
+		/* Force update of L2 state - if we have abandoned a power off
+		 * then this may be required to power the L2 back on.
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
 	/* Update core status as required by the policy */
 	kbase_pm_update_cores_state(kbdev);
 
@@ -194,36 +256,23 @@
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 	unsigned long flags;
 
-#if !PLATFORM_POWER_DOWN_ONLY
-	/* Wait for power transitions to complete. We do this with no locks held
-	 * so that we don't deadlock with any pending workqueues */
-	kbase_pm_check_transitions_sync(kbdev);
-#endif /* !PLATFORM_POWER_DOWN_ONLY */
+	if (!platform_power_down_only)
+		/* Wait for power transitions to complete. We do this with no locks held
+		 * so that we don't deadlock with any pending workqueues.
+		 */
+		kbase_pm_wait_for_desired_state(kbdev);
 
 	mutex_lock(&js_devdata->runpool_mutex);
 	mutex_lock(&kbdev->pm.lock);
 
-#if PLATFORM_POWER_DOWN_ONLY
-	if (kbdev->pm.backend.gpu_powered) {
-		if (kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_L2)) {
-			/* If L2 cache is powered then we must flush it before
-			 * we power off the GPU. Normally this would have been
-			 * handled when the L2 was powered off. */
-			kbase_gpu_cacheclean(kbdev);
-		}
-	}
-#endif /* PLATFORM_POWER_DOWN_ONLY */
-
 	if (!backend->poweron_required) {
-#if !PLATFORM_POWER_DOWN_ONLY
 		unsigned long flags;
 
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-		WARN_ON(kbdev->l2_available_bitmap ||
-				kbdev->shader_available_bitmap ||
-				kbdev->tiler_available_bitmap);
+		WARN_ON(backend->shaders_state !=
+					KBASE_SHADERS_OFF_CORESTACK_OFF ||
+			backend->l2_state != KBASE_L2_OFF);
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-#endif /* !PLATFORM_POWER_DOWN_ONLY */
 
 		/* Disable interrupts and turn the clock off */
 		if (!kbase_pm_clock_off(kbdev, backend->poweroff_is_suspend)) {
@@ -256,6 +305,8 @@
 	backend->poweroff_wait_in_progress = false;
 	if (backend->poweron_required) {
 		backend->poweron_required = false;
+		kbdev->pm.backend.l2_desired = true;
+		kbase_pm_update_state(kbdev);
 		kbase_pm_update_cores_state_nolock(kbdev);
 		kbase_backend_slot_update(kbdev);
 	}
@@ -267,6 +318,161 @@
 	wake_up(&kbdev->pm.backend.poweroff_wait);
 }
 
+static void kbase_pm_l2_clock_slow(struct kbase_device *kbdev)
+{
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	struct clk *clk = kbdev->clocks[0];
+#endif
+
+	if (!kbdev->pm.backend.gpu_clock_slow_down_wa)
+		return;
+
+	/* No suspend clock is specified */
+	if (WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_suspend_freq))
+		return;
+
+#if defined(CONFIG_MALI_DEVFREQ)
+
+	/* Suspend devfreq */
+	devfreq_suspend_device(kbdev->devfreq);
+
+	/* Keep the current freq to restore it upon resume */
+	kbdev->previous_frequency = kbdev->current_nominal_freq;
+
+	/* Slow down GPU clock to the suspend clock*/
+	kbase_devfreq_force_freq(kbdev,
+			kbdev->pm.backend.gpu_clock_suspend_freq);
+
+#elif defined(CONFIG_MALI_MIDGARD_DVFS) /* CONFIG_MALI_DEVFREQ */
+
+	if (WARN_ON_ONCE(!clk))
+		return;
+
+	/* Stop the metrics gathering framework */
+	if (kbase_pm_metrics_is_active(kbdev))
+		kbase_pm_metrics_stop(kbdev);
+
+	/* Keep the current freq to restore it upon resume */
+	kbdev->previous_frequency = clk_get_rate(clk);
+
+	/* Slow down GPU clock to the suspend clock*/
+	if (WARN_ON_ONCE(clk_set_rate(clk,
+				kbdev->pm.backend.gpu_clock_suspend_freq)))
+		dev_err(kbdev->dev, "Failed to set suspend freq\n");
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+static void kbase_pm_l2_clock_normalize(struct kbase_device *kbdev)
+{
+#if defined(CONFIG_MALI_MIDGARD_DVFS)
+	struct clk *clk = kbdev->clocks[0];
+#endif
+
+	if (!kbdev->pm.backend.gpu_clock_slow_down_wa)
+		return;
+
+#if defined(CONFIG_MALI_DEVFREQ)
+
+	/* Restore GPU clock to the previous one */
+	kbase_devfreq_force_freq(kbdev, kbdev->previous_frequency);
+
+	/* Resume devfreq */
+	devfreq_resume_device(kbdev->devfreq);
+
+#elif defined(CONFIG_MALI_MIDGARD_DVFS) /* CONFIG_MALI_DEVFREQ */
+
+	if (WARN_ON_ONCE(!clk))
+		return;
+
+	/* Restore GPU clock */
+	if (WARN_ON_ONCE(clk_set_rate(clk, kbdev->previous_frequency)))
+		dev_err(kbdev->dev, "Failed to restore freq (%lu)\n",
+			kbdev->previous_frequency);
+
+	/* Restart the metrics gathering framework */
+	kbase_pm_metrics_start(kbdev);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+}
+
+static void kbase_pm_gpu_clock_control_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.gpu_clock_control_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+	bool slow_down = false, normalize = false;
+
+	/* Determine if GPU clock control is required */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (!backend->gpu_clock_slowed_down &&
+			backend->gpu_clock_slow_down_desired) {
+		slow_down = true;
+		backend->gpu_clock_slowed_down = true;
+	} else if (backend->gpu_clock_slowed_down &&
+			!backend->gpu_clock_slow_down_desired) {
+		normalize = true;
+		backend->gpu_clock_slowed_down = false;
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Control GPU clock according to the request of L2 state machine.
+	 * The GPU clock needs to be lowered for safe L2 power down
+	 * and restored to previous speed at L2 power up.
+	 */
+	if (slow_down)
+		kbase_pm_l2_clock_slow(kbdev);
+	else if (normalize)
+		kbase_pm_l2_clock_normalize(kbdev);
+
+	/* Tell L2 state machine to transit to next state */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_update_state(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+static void kbase_pm_hwcnt_disable_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+			pm.backend.hwcnt_disable_work);
+	struct kbase_pm_device_data *pm = &kbdev->pm;
+	struct kbase_pm_backend_data *backend = &pm->backend;
+	unsigned long flags;
+
+	bool do_disable;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!do_disable)
+		return;
+
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	do_disable = !backend->hwcnt_desired && !backend->hwcnt_disabled;
+
+	if (do_disable) {
+		/* PM state did not change while we were doing the disable,
+		 * so commit the work we just performed and continue the state
+		 * machine.
+		 */
+		backend->hwcnt_disabled = true;
+		kbase_pm_update_state(kbdev);
+		kbase_backend_slot_update(kbdev);
+	} else {
+		/* PM state was updated while we were doing the disable,
+		 * so we need to undo the disable we just performed.
+		 */
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
 void kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
 {
 	unsigned long flags;
@@ -274,29 +480,31 @@
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	if (!kbdev->pm.backend.poweroff_wait_in_progress) {
-		/* Force all cores off */
-		kbdev->pm.backend.desired_shader_state = 0;
-		kbdev->pm.backend.desired_tiler_state = 0;
 
-		/* Force all cores to be unavailable, in the situation where
-		 * transitions are in progress for some cores but not others,
-		 * and kbase_pm_check_transitions_nolock can not immediately
-		 * power off the cores */
-		kbdev->shader_available_bitmap = 0;
-		kbdev->tiler_available_bitmap = 0;
-		kbdev->l2_available_bitmap = 0;
+	if (!kbdev->pm.backend.gpu_powered)
+		goto unlock_hwaccess;
 
-		kbdev->pm.backend.poweroff_wait_in_progress = true;
-		kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+	if (kbdev->pm.backend.poweroff_wait_in_progress)
+		goto unlock_hwaccess;
 
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		/*Kick off wq here. Callers will have to wait*/
-		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
-				&kbdev->pm.backend.gpu_poweroff_wait_work);
-	} else {
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	}
+	/* Force all cores off */
+	kbdev->pm.backend.shaders_desired = false;
+	kbdev->pm.backend.l2_desired = false;
+
+	kbdev->pm.backend.poweroff_wait_in_progress = true;
+	kbdev->pm.backend.poweroff_is_suspend = is_suspend;
+	kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = true;
+
+	/* l2_desired being false should cause the state machine to
+	 * start powering off the L2. When it actually is powered off,
+	 * the interrupt handler will call kbase_pm_l2_update_state()
+	 * again, which will trigger the kbase_pm_gpu_poweroff_wait_wq.
+	 * Callers of this function will need to wait on poweroff_wait.
+	 */
+	kbase_pm_update_state(kbdev);
+
+unlock_hwaccess:
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 static bool is_poweroff_in_progress(struct kbase_device *kbdev)
@@ -316,6 +524,7 @@
 	wait_event_killable(kbdev->pm.backend.poweroff_wait,
 			is_poweroff_in_progress(kbdev));
 }
+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_complete);
 
 int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
 		unsigned int flags)
@@ -341,8 +550,6 @@
 		return ret;
 	}
 
-	kbasep_pm_init_core_use_bitmaps(kbdev);
-
 	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
 			kbdev->pm.debug_core_mask[1] =
 			kbdev->pm.debug_core_mask[2] =
@@ -363,9 +570,7 @@
 	/* We are ready to receive IRQ's now as power policy is set up, so
 	 * enable them now. */
 #ifdef CONFIG_MALI_DEBUG
-	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
 	kbdev->pm.backend.driver_ready_for_irqs = true;
-	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
 #endif
 	kbase_pm_enable_interrupts(kbdev);
 
@@ -374,9 +579,6 @@
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
 
-	/* Idle the GPU and/or cores, if the policy wants it to */
-	kbase_pm_context_idle(kbdev);
-
 	return 0;
 }
 
@@ -385,7 +587,6 @@
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
 	mutex_lock(&kbdev->pm.lock);
-	kbase_pm_cancel_deferred_poweroff(kbdev);
 	kbase_pm_do_poweroff(kbdev, false);
 	mutex_unlock(&kbdev->pm.lock);
 }
@@ -398,7 +599,18 @@
 	KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
 
+	cancel_work_sync(&kbdev->pm.backend.hwcnt_disable_work);
+
+	if (kbdev->pm.backend.hwcnt_disabled) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+
 	/* Free any resources the policy allocated */
+	kbase_pm_state_machine_term(kbdev);
 	kbase_pm_policy_term(kbdev);
 	kbase_pm_ca_term(kbdev);
 
@@ -410,14 +622,12 @@
 
 void kbase_pm_power_changed(struct kbase_device *kbdev)
 {
-	bool cores_are_available;
 	unsigned long flags;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+	kbase_pm_update_state(kbdev);
 
-	if (cores_are_available)
-		kbase_backend_slot_update(kbdev);
+	kbase_backend_slot_update(kbdev);
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
@@ -455,7 +665,6 @@
 	mutex_lock(&js_devdata->runpool_mutex);
 	mutex_lock(&kbdev->pm.lock);
 
-	kbase_pm_cancel_deferred_poweroff(kbdev);
 	kbase_pm_do_poweroff(kbdev, true);
 
 	kbase_backend_timer_suspend(kbdev);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
index d4e8e42..41f6429 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -27,18 +27,21 @@
 #include <mali_kbase.h>
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
+#ifdef CONFIG_MALI_NO_MALI
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif
 
 int kbase_pm_ca_init(struct kbase_device *kbdev)
 {
-	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
 #ifdef CONFIG_MALI_DEVFREQ
+	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+
 	if (kbdev->current_core_mask)
 		pm_backend->ca_cores_enabled = kbdev->current_core_mask;
 	else
 		pm_backend->ca_cores_enabled =
 				kbdev->gpu_props.props.raw_props.shader_present;
 #endif
-	pm_backend->ca_in_transition = false;
 
 	return 0;
 }
@@ -55,10 +58,17 @@
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
+	if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
+		dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
+				core_mask, kbdev->pm.debug_core_mask_all);
+		goto unlock;
+	}
+
 	pm_backend->ca_cores_enabled = core_mask;
 
-	kbase_pm_update_cores_state_nolock(kbdev);
+	kbase_pm_update_state(kbdev);
 
+unlock:
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
@@ -68,15 +78,12 @@
 
 u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
 {
+#ifdef CONFIG_MALI_DEVFREQ
 	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+#endif
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	/* All cores must be enabled when instrumentation is in use */
-	if (pm_backend->instr_enabled)
-		return kbdev->gpu_props.props.raw_props.shader_present &
-				kbdev->pm.debug_core_mask_all;
-
 #ifdef CONFIG_MALI_DEVFREQ
 	return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all;
 #else
@@ -87,21 +94,13 @@
 
 KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
 
-void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbdev->pm.backend.instr_enabled = true;
-
-	kbase_pm_update_cores_state_nolock(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-}
-
-void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
-	kbdev->pm.backend.instr_enabled = false;
 
-	kbase_pm_update_cores_state_nolock(kbdev);
+#ifdef CONFIG_MALI_NO_MALI
+	return (((1ull) << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1);
+#else
+	return kbdev->pm.backend.pm_shaders_core_mask;
+#endif
 }
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
index 2b005c9..5423e96 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -75,23 +75,15 @@
 						u64 cores_transitioning);
 
 /**
- * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ * kbase_pm_ca_get_instr_core_mask - Get the PM state sync-ed shaders core mask
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
- * This overrides the output of the core availability policy, ensuring that all
- * cores are available
- */
-void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
-
-/**
- * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ * Returns a mask of the PM state synchronised shader cores for arranging
+ * HW performance counter dumps
  *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * This disables any previously enabled override, and resumes normal policy
- * functionality
+ * Return: The bit mask of PM state synchronised cores
  */
-void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+u64 kbase_pm_ca_get_instr_core_mask(struct kbase_device *kbdev);
 
 #endif /* _KBASE_PM_CA_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
index 7fe8eb3..d7dc63a 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,10 +29,8 @@
 
 #include "mali_kbase_pm_always_on.h"
 #include "mali_kbase_pm_coarse_demand.h"
-#include "mali_kbase_pm_demand.h"
 #if !MALI_CUSTOMER_RELEASE
-#include "mali_kbase_pm_demand_always_powered.h"
-#include "mali_kbase_pm_fast_start.h"
+#include "mali_kbase_pm_always_on_demand.h"
 #endif
 
 /* Forward definition - see mali_kbase.h */
@@ -40,6 +38,11 @@
 struct kbase_jd_atom;
 
 /**
+ * Maximum number of PM policies that may be active on a device.
+ */
+#define KBASE_PM_MAX_NUM_POLICIES (10)
+
+/**
  * enum kbase_pm_core_type - The types of core in a GPU.
  *
  * These enumerated values are used in calls to
@@ -65,6 +68,71 @@
 };
 
 /**
+ * enum kbase_l2_core_state - The states used for the L2 cache & tiler power
+ *                            state machine.
+ *
+ * @KBASE_L2_OFF: The L2 cache and tiler are off
+ * @KBASE_L2_PEND_ON: The L2 cache and tiler are powering on
+ * @KBASE_L2_RESTORE_CLOCKS: The GPU clock is restored. Conditionally used.
+ * @KBASE_L2_ON_HWCNT_ENABLE: The L2 cache and tiler are on, and hwcnt is being
+ *                            enabled
+ * @KBASE_L2_ON: The L2 cache and tiler are on, and hwcnt is enabled
+ * @KBASE_L2_ON_HWCNT_DISABLE: The L2 cache and tiler are on, and hwcnt is being
+ *                             disabled
+ * @KBASE_L2_SLOW_DOWN_CLOCKS: The GPU clock is set to appropriate or lowest
+ *                             clock. Conditionally used.
+ * @KBASE_L2_POWER_DOWN: The L2 cache and tiler are about to be powered off
+ * @KBASE_L2_PEND_OFF: The L2 cache and tiler are powering off
+ * @KBASE_L2_RESET_WAIT: The GPU is resetting, L2 cache and tiler power state
+ *                       are unknown
+ */
+enum kbase_l2_core_state {
+#define KBASEP_L2_STATE(n) KBASE_L2_ ## n,
+#include "mali_kbase_pm_l2_states.h"
+#undef KBASEP_L2_STATE
+};
+
+/**
+ * enum kbase_shader_core_state - The states used for the shaders' state machine.
+ *
+ * @KBASE_SHADERS_OFF_CORESTACK_OFF: The shaders and core stacks are off
+ * @KBASE_SHADERS_OFF_CORESTACK_PEND_ON: The shaders are off, core stacks have
+ *                                       been requested to power on and hwcnt
+ *                                       is being disabled
+ * @KBASE_SHADERS_PEND_ON_CORESTACK_ON: Core stacks are on, shaders have been
+ *                                      requested to power on.
+ * @KBASE_SHADERS_ON_CORESTACK_ON: The shaders and core stacks are on, and hwcnt
+ *					already enabled.
+ * @KBASE_SHADERS_ON_CORESTACK_ON_RECHECK: The shaders and core stacks
+ *                                      are on, hwcnt disabled, and checks
+ *                                      to powering down or re-enabling
+ *                                      hwcnt.
+ * @KBASE_SHADERS_WAIT_OFF_CORESTACK_ON: The shaders have been requested to
+ *                                       power off, but they remain on for the
+ *                                       duration of the hysteresis timer
+ * @KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON: The hysteresis timer has expired
+ * @KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON: The core stacks are on and the
+ *                                          level 2 cache is being flushed.
+ * @KBASE_SHADERS_READY_OFF_CORESTACK_ON: The core stacks are on and the shaders
+ *                                        are ready to be powered off.
+ * @KBASE_SHADERS_PEND_OFF_CORESTACK_ON: The core stacks are on, and the shaders
+ *                                       have been requested to power off
+ * @KBASE_SHADERS_OFF_CORESTACK_PEND_OFF: The shaders are off, and the core stacks
+ *                                        have been requested to power off
+ * @KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF: Shaders and corestacks are
+ *                                                  off, but the tick timer
+ *                                                  cancellation is still
+ *                                                  pending.
+ * @KBASE_SHADERS_RESET_WAIT: The GPU is resetting, shader and core stack power
+ *                            states are unknown
+ */
+enum kbase_shader_core_state {
+#define KBASEP_SHADER_STATE(n) KBASE_SHADERS_ ## n,
+#include "mali_kbase_pm_shader_states.h"
+#undef KBASEP_SHADER_STATE
+};
+
+/**
  * struct kbasep_pm_metrics - Metrics data collected for use by the power
  *                            management framework.
  *
@@ -94,8 +162,7 @@
  *           not. Updated when the job scheduler informs us a job in submitted
  *           or removed from a GPU slot.
  *  @active_cl_ctx: number of CL jobs active on the GPU. Array is per-device.
- *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot. As
- *           GL jobs never run on slot 2 this slot is not recorded.
+ *  @active_gl_ctx: number of GL jobs active on the GPU. Array is per-slot.
  *  @lock: spinlock protecting the kbasep_pm_metrics_data structure
  *  @platform_data: pointer to data controlled by platform specific code
  *  @kbdev: pointer to kbase device for which metrics are collected
@@ -112,7 +179,7 @@
 	ktime_t time_period_start;
 	bool gpu_active;
 	u32 active_cl_ctx[2];
-	u32 active_gl_ctx[2]; /* GL jobs can only run on 2 of the 3 job slots */
+	u32 active_gl_ctx[3];
 	spinlock_t lock;
 
 	void *platform_data;
@@ -128,13 +195,39 @@
 #endif
 };
 
+/**
+ * struct kbasep_pm_tick_timer_state - State for the shader hysteresis timer
+ * @wq: Work queue to wait for the timer to stopped
+ * @work: Work item which cancels the timer
+ * @timer: Timer for powering off the shader cores
+ * @configured_interval: Period of GPU poweroff timer
+ * @configured_ticks: User-configured number of ticks to wait after the shader
+ *                    power down request is received before turning off the cores
+ * @remaining_ticks: Number of remaining timer ticks until shaders are powered off
+ * @cancel_queued: True if the cancellation work item has been queued. This is
+ *                 required to ensure that it is not queued twice, e.g. after
+ *                 a reset, which could cause the timer to be incorrectly
+ *                 cancelled later by a delayed workitem.
+ * @needed: Whether the timer should restart itself
+ */
+struct kbasep_pm_tick_timer_state {
+	struct workqueue_struct *wq;
+	struct work_struct work;
+	struct hrtimer timer;
+
+	ktime_t configured_interval;
+	unsigned int configured_ticks;
+	unsigned int remaining_ticks;
+
+	bool cancel_queued;
+	bool needed;
+};
+
 union kbase_pm_policy_data {
 	struct kbasep_pm_policy_always_on always_on;
 	struct kbasep_pm_policy_coarse_demand coarse_demand;
-	struct kbasep_pm_policy_demand demand;
 #if !MALI_CUSTOMER_RELEASE
-	struct kbasep_pm_policy_demand_always_powered demand_always_powered;
-	struct kbasep_pm_policy_fast_start fast_start;
+	struct kbasep_pm_policy_always_on_demand always_on_demand;
 #endif
 };
 
@@ -147,71 +240,35 @@
  * @pm_current_policy: The policy that is currently actively controlling the
  *                     power state.
  * @pm_policy_data:    Private data for current PM policy
- * @ca_in_transition:  Flag indicating when core availability policy is
- *                     transitioning cores. The core availability policy must
- *                     set this when a change in core availability is occurring.
- *                     power_change_lock must be held when accessing this.
  * @reset_done:        Flag when a reset is complete
  * @reset_done_wait:   Wait queue to wait for changes to @reset_done
- * @l2_powered_wait:   Wait queue for whether the l2 cache has been powered as
- *                     requested
- * @l2_powered:        State indicating whether all the l2 caches are powered.
- *                     Non-zero indicates they're *all* powered
- *                     Zero indicates that some (or all) are not powered
  * @gpu_cycle_counter_requests: The reference count of active gpu cycle counter
  *                              users
  * @gpu_cycle_counter_requests_lock: Lock to protect @gpu_cycle_counter_requests
- * @desired_shader_state: A bit mask identifying the shader cores that the
- *                        power policy would like to be on. The current state
- *                        of the cores may be different, but there should be
- *                        transitions in progress that will eventually achieve
- *                        this state (assuming that the policy doesn't change
- *                        its mind in the mean time).
- * @powering_on_shader_state: A bit mask indicating which shader cores are
- *                            currently in a power-on transition
- * @desired_tiler_state: A bit mask identifying the tiler cores that the power
- *                       policy would like to be on. See @desired_shader_state
- * @powering_on_tiler_state: A bit mask indicating which tiler core are
- *                           currently in a power-on transition
- * @powering_on_l2_state: A bit mask indicating which l2-caches are currently
- *                        in a power-on transition
- * @powering_on_stack_state: A bit mask indicating which core stacks are
- *                           currently in a power-on transition
- * @gpu_in_desired_state: This flag is set if the GPU is powered as requested
- *                        by the desired_xxx_state variables
- * @gpu_in_desired_state_wait: Wait queue set when @gpu_in_desired_state != 0
+ * @gpu_in_desired_state_wait: Wait queue set when the GPU is in the desired
+ *                             state according to the L2 and shader power state
+ *                             machines
  * @gpu_powered:       Set to true when the GPU is powered and register
- *                     accesses are possible, false otherwise
- * @instr_enabled:     Set to true when instrumentation is enabled,
- *                     false otherwise
+ *                     accesses are possible, false otherwise. Access to this
+ *                     variable should be protected by: both the hwaccess_lock
+ *                     spinlock and the pm.lock mutex for writes; or at least
+ *                     one of either lock for reads.
+ * @pm_shaders_core_mask: Shader PM state synchronised shaders core mask. It
+ *                     holds the cores enabled in a hardware counters dump,
+ *                     and may differ from @shaders_avail when under different
+ *                     states and transitions.
  * @cg1_disabled:      Set if the policy wants to keep the second core group
  *                     powered off
  * @driver_ready_for_irqs: Debug state indicating whether sufficient
  *                         initialization of the driver has occurred to handle
  *                         IRQs
- * @gpu_powered_lock:  Spinlock that must be held when writing @gpu_powered or
- *                     accessing @driver_ready_for_irqs
  * @metrics:           Structure to hold metrics for the GPU
- * @gpu_poweroff_pending: number of poweroff timer ticks until the GPU is
- *                        powered off
- * @shader_poweroff_pending_time: number of poweroff timer ticks until shaders
- *                        and/or timers are powered off
- * @gpu_poweroff_timer: Timer for powering off GPU
- * @gpu_poweroff_wq:   Workqueue to power off GPU on when timer fires
- * @gpu_poweroff_work: Workitem used on @gpu_poweroff_wq
- * @shader_poweroff_pending: Bit mask of shaders to be powered off on next
- *                           timer callback
- * @tiler_poweroff_pending: Bit mask of tilers to be powered off on next timer
- *                          callback
- * @poweroff_timer_needed: true if the poweroff timer is currently required,
- *                         false otherwise
- * @poweroff_timer_running: true if the poweroff timer is currently running,
- *                          false otherwise
- *                          power_change_lock should be held when accessing,
- *                          unless there is no way the timer can be running (eg
- *                          hrtimer_cancel() was called immediately before)
+ * @shader_tick_timer: Structure to hold the shader poweroff tick timer state
  * @poweroff_wait_in_progress: true if a wait for GPU power off is in progress.
  *                             hwaccess_lock must be held when accessing
+ * @invoke_poweroff_wait_wq_when_l2_off: flag indicating that the L2 power state
+ *                                       machine should invoke the poweroff
+ *                                       worker after the L2 has turned off.
  * @poweron_required: true if a GPU power on is required. Should only be set
  *                    when poweroff_wait_in_progress is true, and therefore the
  *                    GPU can not immediately be powered on. pm.lock must be
@@ -236,40 +293,75 @@
  * @callback_power_runtime_idle: Optional callback when the GPU may be idle. See
  *                              &struct kbase_pm_callback_conf
  * @ca_cores_enabled: Cores that are currently available
+ * @l2_state:     The current state of the L2 cache state machine. See
+ *                &enum kbase_l2_core_state
+ * @l2_desired:   True if the L2 cache should be powered on by the L2 cache state
+ *                machine
+ * @l2_always_on: If true, disable powering down of l2 cache.
+ * @shaders_state: The current state of the shader state machine.
+ * @shaders_avail: This is updated by the state machine when it is in a state
+ *                 where it can handle changes to the core availability. This
+ *                 is internal to the shader state machine and should *not* be
+ *                 modified elsewhere.
+ * @shaders_desired: True if the PM active count or power policy requires the
+ *                   shader cores to be on. This is used as an input to the
+ *                   shader power state machine.  The current state of the
+ *                   cores may be different, but there should be transitions in
+ *                   progress that will eventually achieve this state (assuming
+ *                   that the policy doesn't change its mind in the mean time).
+ * @in_reset: True if a GPU is resetting and normal power manager operation is
+ *            suspended
+ * @protected_entry_transition_override : True if GPU reset is being used
+ *                                  before entering the protected mode and so
+ *                                  the reset handling behaviour is being
+ *                                  overridden.
+ * @protected_transition_override : True if a protected mode transition is in
+ *                                  progress and is overriding power manager
+ *                                  behaviour.
+ * @protected_l2_override : Non-zero if the L2 cache is required during a
+ *                          protected mode transition. Has no effect if not
+ *                          transitioning.
+ * @hwcnt_desired: True if we want GPU hardware counters to be enabled.
+ * @hwcnt_disabled: True if GPU hardware counters are not enabled.
+ * @hwcnt_disable_work: Work item to disable GPU hardware counters, used if
+ *                      atomic disable is not possible.
+ * @gpu_clock_suspend_freq: 'opp-mali-errata-1485982' clock in opp table
+ *                          for safe L2 power cycle.
+ *                          If no opp-mali-errata-1485982 specified,
+ *                          the slowest clock will be taken.
+ * @gpu_clock_slow_down_wa: If true, slow down GPU clock during L2 power cycle.
+ * @gpu_clock_slow_down_desired: True if we want lower GPU clock
+ *                             for safe L2 power cycle. False if want GPU clock
+ *                             to back to normalized one. This is updated only
+ *                             in L2 state machine, kbase_pm_l2_update_state.
+ * @gpu_clock_slowed_down: During L2 power cycle,
+ *                         True if gpu clock is set at lower frequency
+ *                         for safe L2 power down, False if gpu clock gets
+ *                         restored to previous speed. This is updated only in
+ *                         work function, kbase_pm_gpu_clock_control_worker.
+ * @gpu_clock_control_work: work item to set GPU clock during L2 power cycle
+ *                          using gpu_clock_control
  *
  * Note:
  * During an IRQ, @pm_current_policy can be NULL when the policy is being
  * changed with kbase_pm_set_policy(). The change is protected under
- * kbase_device.pm.power_change_lock. Direct access to this from IRQ context
+ * kbase_device.pm.pcower_change_lock. Direct access to this from IRQ context
  * must therefore check for NULL. If NULL, then kbase_pm_set_policy() will
  * re-issue the policy functions that would have been done under IRQ.
  */
 struct kbase_pm_backend_data {
 	const struct kbase_pm_policy *pm_current_policy;
 	union kbase_pm_policy_data pm_policy_data;
-	bool ca_in_transition;
 	bool reset_done;
 	wait_queue_head_t reset_done_wait;
-	wait_queue_head_t l2_powered_wait;
-	int l2_powered;
 	int gpu_cycle_counter_requests;
 	spinlock_t gpu_cycle_counter_requests_lock;
 
-	u64 desired_shader_state;
-	u64 powering_on_shader_state;
-	u64 desired_tiler_state;
-	u64 powering_on_tiler_state;
-	u64 powering_on_l2_state;
-#ifdef CONFIG_MALI_CORESTACK
-	u64 powering_on_stack_state;
-#endif /* CONFIG_MALI_CORESTACK */
-
-	bool gpu_in_desired_state;
 	wait_queue_head_t gpu_in_desired_state_wait;
 
 	bool gpu_powered;
 
-	bool instr_enabled;
+	u64 pm_shaders_core_mask;
 
 	bool cg1_disabled;
 
@@ -277,25 +369,12 @@
 	bool driver_ready_for_irqs;
 #endif /* CONFIG_MALI_DEBUG */
 
-	spinlock_t gpu_powered_lock;
-
-
 	struct kbasep_pm_metrics_state metrics;
 
-	int gpu_poweroff_pending;
-	int shader_poweroff_pending_time;
-
-	struct hrtimer gpu_poweroff_timer;
-	struct workqueue_struct *gpu_poweroff_wq;
-	struct work_struct gpu_poweroff_work;
-
-	u64 shader_poweroff_pending;
-	u64 tiler_poweroff_pending;
-
-	bool poweroff_timer_needed;
-	bool poweroff_timer_running;
+	struct kbasep_pm_tick_timer_state shader_tick_timer;
 
 	bool poweroff_wait_in_progress;
+	bool invoke_poweroff_wait_wq_when_l2_off;
 	bool poweron_required;
 	bool poweroff_is_suspend;
 
@@ -312,25 +391,46 @@
 	void (*callback_power_runtime_off)(struct kbase_device *kbdev);
 	int (*callback_power_runtime_idle)(struct kbase_device *kbdev);
 
-#ifdef CONFIG_MALI_DEVFREQ
 	u64 ca_cores_enabled;
-#endif
+
+	enum kbase_l2_core_state l2_state;
+	enum kbase_shader_core_state shaders_state;
+	u64 shaders_avail;
+	bool l2_desired;
+	bool l2_always_on;
+	bool shaders_desired;
+
+	bool in_reset;
+
+	bool protected_entry_transition_override;
+	bool protected_transition_override;
+	int protected_l2_override;
+
+	bool hwcnt_desired;
+	bool hwcnt_disabled;
+	struct work_struct hwcnt_disable_work;
+
+	u64 gpu_clock_suspend_freq;
+	bool gpu_clock_slow_down_wa;
+	bool gpu_clock_slow_down_desired;
+	bool gpu_clock_slowed_down;
+	struct work_struct gpu_clock_control_work;
 };
 
 
 /* List of policy IDs */
 enum kbase_pm_policy_id {
-	KBASE_PM_POLICY_ID_DEMAND = 1,
-	KBASE_PM_POLICY_ID_ALWAYS_ON,
 	KBASE_PM_POLICY_ID_COARSE_DEMAND,
 #if !MALI_CUSTOMER_RELEASE
-	KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
-	KBASE_PM_POLICY_ID_FAST_START
+	KBASE_PM_POLICY_ID_ALWAYS_ON_DEMAND,
 #endif
+	KBASE_PM_POLICY_ID_ALWAYS_ON
 };
 
 typedef u32 kbase_pm_policy_flags;
 
+#define KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY (1u)
+
 /**
  * struct kbase_pm_policy - Power policy structure.
  *
@@ -377,13 +477,8 @@
 	/**
 	 * Function called to find out if shader cores are needed
 	 *
-	 * This needs to at least satisfy kbdev->shader_needed_cnt, and so must
-	 * never return false when kbdev->shader_needed_cnt > 0.
-	 *
-	 * Note that kbdev->pm.active_count being 0 is not a good indicator
-	 * that kbdev->shader_needed_cnt is also 0 - refer to the documentation
-	 * on the active_count member in struct kbase_pm_device_data and
-	 * kbase_pm_is_active().
+	 * This needs to at least satisfy kbdev->pm.backend.shaders_desired,
+	 * and so must never return false when shaders_desired is true.
 	 *
 	 * @kbdev: The kbase device structure for the device (must be a
 	 *         valid pointer)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
index cdd5cf7..92d3818 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,27 +29,43 @@
 #include <mali_kbase.h>
 #include <mali_kbase_config_defaults.h>
 #include <mali_midg_regmap.h>
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-#include <mali_kbase_gator.h>
-#endif
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
 #include <mali_kbase_pm.h>
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_smc.h>
 #include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_hwcnt_context.h>
 #include <backend/gpu/mali_kbase_cache_policy_backend.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_l2_mmu_config.h>
 
 #include <linux/of.h>
 
-#if MALI_MOCK_TEST
-#define MOCKABLE(function) function##_original
+#ifdef CONFIG_MALI_CORESTACK
+bool corestack_driver_control = true;
 #else
-#define MOCKABLE(function) function
-#endif				/* MALI_MOCK_TEST */
+bool corestack_driver_control; /* Default value of 0/false */
+#endif
+module_param(corestack_driver_control, bool, 0444);
+MODULE_PARM_DESC(corestack_driver_control,
+		"Let the driver power on/off the GPU core stack independently "
+		"without involving the Power Domain Controller. This should "
+		"only be enabled on platforms for which integration of the PDC "
+		"to the Mali GPU is known to be problematic.");
+KBASE_EXPORT_TEST_API(corestack_driver_control);
+
+#ifdef CONFIG_MALI_PLATFORM_POWER_DOWN_ONLY
+bool platform_power_down_only = true;
+#else
+bool platform_power_down_only; /* Default value of 0/false */
+#endif
+module_param(platform_power_down_only, bool, 0444);
+MODULE_PARM_DESC(platform_power_down_only,
+		"Disable power down of individual cores.");
 
 /**
  * enum kbasep_pm_action - Actions that can be performed on a core.
@@ -79,6 +95,88 @@
 		enum kbase_pm_core_type core_type,
 		enum kbasep_pm_action action);
 
+static bool kbase_pm_is_l2_desired(struct kbase_device *kbdev)
+{
+	if (kbdev->pm.backend.protected_entry_transition_override)
+		return false;
+
+	if (kbdev->pm.backend.protected_transition_override &&
+			kbdev->pm.backend.protected_l2_override)
+		return true;
+
+	if (kbdev->pm.backend.protected_transition_override &&
+			!kbdev->pm.backend.shaders_desired)
+		return false;
+
+	return kbdev->pm.backend.l2_desired;
+}
+
+void kbase_pm_protected_override_enable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.protected_transition_override = true;
+}
+void kbase_pm_protected_override_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbdev->pm.backend.protected_transition_override = false;
+}
+
+int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->protected_mode_transition);
+
+	if (kbdev->pm.backend.l2_always_on &&
+	    (kbdev->system_coherency == COHERENCY_ACE)) {
+		WARN_ON(kbdev->pm.backend.protected_entry_transition_override);
+
+		/*
+		 * If there is already a GPU reset pending then wait for it to
+		 * complete before initiating a special reset for protected
+		 * mode entry.
+		 */
+		if (kbase_reset_gpu_silent(kbdev))
+			return -EAGAIN;
+
+		kbdev->pm.backend.protected_entry_transition_override = true;
+	}
+
+	return 0;
+}
+
+void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON(!kbdev->protected_mode_transition);
+
+	if (kbdev->pm.backend.l2_always_on &&
+	    (kbdev->system_coherency == COHERENCY_ACE)) {
+		WARN_ON(!kbdev->pm.backend.protected_entry_transition_override);
+
+		kbdev->pm.backend.protected_entry_transition_override = false;
+	}
+}
+
+void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (override) {
+		kbdev->pm.backend.protected_l2_override++;
+		WARN_ON(kbdev->pm.backend.protected_l2_override <= 0);
+	} else {
+		kbdev->pm.backend.protected_l2_override--;
+		WARN_ON(kbdev->pm.backend.protected_l2_override < 0);
+	}
+
+	kbase_pm_update_state(kbdev);
+}
+
 /**
  * core_type_to_reg - Decode a core type and action to a register.
  *
@@ -96,24 +194,24 @@
 static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
 						enum kbasep_pm_action action)
 {
-#ifdef CONFIG_MALI_CORESTACK
-	if (core_type == KBASE_PM_CORE_STACK) {
-		switch (action) {
-		case ACTION_PRESENT:
-			return STACK_PRESENT_LO;
-		case ACTION_READY:
-			return STACK_READY_LO;
-		case ACTION_PWRON:
-			return STACK_PWRON_LO;
-		case ACTION_PWROFF:
-			return STACK_PWROFF_LO;
-		case ACTION_PWRTRANS:
-			return STACK_PWRTRANS_LO;
-		default:
-			BUG();
+	if (corestack_driver_control) {
+		if (core_type == KBASE_PM_CORE_STACK) {
+			switch (action) {
+			case ACTION_PRESENT:
+				return STACK_PRESENT_LO;
+			case ACTION_READY:
+				return STACK_READY_LO;
+			case ACTION_PWRON:
+				return STACK_PWRON_LO;
+			case ACTION_PWROFF:
+				return STACK_PWROFF_LO;
+			case ACTION_PWRTRANS:
+				return STACK_PWRTRANS_LO;
+			default:
+				WARN(1, "Invalid action for core type\n");
+			}
 		}
 	}
-#endif /* CONFIG_MALI_CORESTACK */
 
 	return (u32)core_type + (u32)action;
 }
@@ -170,19 +268,17 @@
 	u32 lo = cores & 0xFFFFFFFF;
 	u32 hi = (cores >> 32) & 0xFFFFFFFF;
 
+	/* When 'platform_power_down_only' is enabled, no core type should be
+	 * turned off individually.
+	 */
+	KBASE_DEBUG_ASSERT(!(action == ACTION_PWROFF &&
+			platform_power_down_only));
+
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	reg = core_type_to_reg(core_type, action);
 
 	KBASE_DEBUG_ASSERT(reg);
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-	if (cores) {
-		if (action == ACTION_PWRON)
-			kbase_trace_mali_pm_power_on(core_type, cores);
-		else if (action == ACTION_PWROFF)
-			kbase_trace_mali_pm_power_off(core_type, cores);
-	}
-#endif
 
 	if (cores) {
 		u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
@@ -191,7 +287,7 @@
 			state |= cores;
 		else if (action == ACTION_PWROFF)
 			state &= ~cores;
-		KBASE_TLSTREAM_AUX_PM_STATE(core_type, state);
+		KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state);
 	}
 
 	/* Tracing */
@@ -272,16 +368,6 @@
 	return (((u64) hi) << 32) | ((u64) lo);
 }
 
-void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev)
-{
-	kbdev->shader_available_bitmap = 0;
-	kbdev->tiler_available_bitmap = 0;
-	kbdev->l2_users_count = 0;
-	kbdev->l2_available_bitmap = 0;
-	kbdev->tiler_needed_cnt = 0;
-	kbdev->shader_needed_cnt = 0;
-}
-
 /**
  * kbase_pm_get_present_cores - Get the cores that are present
  *
@@ -385,525 +471,990 @@
 
 KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
 
-/**
- * kbase_pm_transition_core_type - Perform power transitions for a particular
- *                                 core type.
- *
- * This function will perform any available power transitions to make the actual
- * hardware state closer to the desired state. If a core is currently
- * transitioning then changes to the power state of that call cannot be made
- * until the transition has finished. Cores which are not present in the
- * hardware are ignored if they are specified in the desired_state bitmask,
- * however the return value will always be 0 in this case.
- *
- * @kbdev:             The kbase device
- * @type:              The core type to perform transitions for
- * @desired_state:     A bit mask of the desired state of the cores
- * @in_use:            A bit mask of the cores that are currently running
- *                     jobs. These cores have to be kept powered up because
- *                     there are jobs running (or about to run) on them.
- * @available:         Receives a bit mask of the cores that the job
- *                     scheduler can use to submit jobs to. May be NULL if
- *                     this is not needed.
- * @powering_on:       Bit mask to update with cores that are
- *                    transitioning to a power-on state.
- *
- * Return: true if the desired state has been reached, false otherwise
- */
-static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
-						enum kbase_pm_core_type type,
-						u64 desired_state,
-						u64 in_use,
-						u64 * const available,
-						u64 *powering_on)
+static void kbase_pm_trigger_hwcnt_disable(struct kbase_device *kbdev)
 {
-	u64 present;
-	u64 ready;
-	u64 trans;
-	u64 powerup;
-	u64 powerdown;
-	u64 powering_on_trans;
-	u64 desired_state_in_use;
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	/* Get current state */
-	present = kbase_pm_get_present_cores(kbdev, type);
-	trans = kbase_pm_get_trans_cores(kbdev, type);
-	ready = kbase_pm_get_ready_cores(kbdev, type);
-
-	/* mask off ready from trans in case transitions finished between the
-	 * register reads */
-	trans &= ~ready;
-
-	powering_on_trans = trans & *powering_on;
-
-	if (available != NULL)
-		*available = (ready | powering_on_trans) & desired_state;
-
-	if (trans) /* Do not progress if any cores are transitioning */
-		return false;
-
-	*powering_on = powering_on_trans;
-
-	/* Update desired state to include the in-use cores. These have to be
-	 * kept powered up because there are jobs running or about to run on
-	 * these cores
+	/* See if we can get away with disabling hwcnt
+	 * atomically, otherwise kick off a worker.
 	 */
-	desired_state_in_use = desired_state | in_use;
-
-	/* Update state of whether l2 caches are powered */
-	if (type == KBASE_PM_CORE_L2) {
-		if ((ready == present) && (desired_state_in_use == ready) &&
-								(trans == 0)) {
-			/* All are ready, none will be turned off, and none are
-			 * transitioning */
-			kbdev->pm.backend.l2_powered = 1;
-			/*
-			 * Ensure snoops are enabled after L2 is powered up,
-			 * note that kbase keeps track of the snoop state, so
-			 * safe to repeatedly call.
-			 */
-			kbase_pm_cache_snoop_enable(kbdev);
-			if (kbdev->l2_users_count > 0) {
-				/* Notify any registered l2 cache users
-				 * (optimized out when no users waiting) */
-				wake_up(&kbdev->pm.backend.l2_powered_wait);
-			}
-		} else
-			kbdev->pm.backend.l2_powered = 0;
-	}
-
-	if (desired_state == ready && (trans == 0))
-		return true;
-
-	/* Restrict the cores to those that are actually present */
-	powerup = desired_state_in_use & present;
-	powerdown = (~desired_state_in_use) & present;
-
-	/* Restrict to cores that are not already in the desired state */
-	powerup &= ~ready;
-	powerdown &= ready;
-
-	/* Don't transition any cores that are already transitioning, except for
-	 * Mali cores that support the following case:
-	 *
-	 * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
-	 * a core that is currently transitioning to power off, then this is
-	 * remembered and the shader core is automatically powered up again once
-	 * the original transition completes. Once the automatic power on is
-	 * complete any job scheduled on the shader core should start.
-	 */
-	powerdown &= ~trans;
-
-	if (kbase_hw_has_feature(kbdev,
-				BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
-		if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
-			trans = powering_on_trans; /* for exception cases, only
-						    * mask off cores in power on
-						    * transitions */
-
-	powerup &= ~trans;
-
-	/* Perform transitions if any */
-	kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
-#if !PLATFORM_POWER_DOWN_ONLY
-	kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+	if (kbase_hwcnt_context_disable_atomic(kbdev->hwcnt_gpu_ctx)) {
+		backend->hwcnt_disabled = true;
+	} else {
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+		queue_work(system_wq,
+			&backend->hwcnt_disable_work);
+#else
+		queue_work(system_highpri_wq,
+			&backend->hwcnt_disable_work);
 #endif
-
-	/* Recalculate cores transitioning on, and re-evaluate our state */
-	powering_on_trans |= powerup;
-	*powering_on = powering_on_trans;
-	if (available != NULL)
-		*available = (ready | powering_on_trans) & desired_state;
-
-	return false;
+	}
 }
 
-KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
-
-/**
- * get_desired_cache_status - Determine which caches should be on for a
- *                            particular core state
- *
- * This function takes a bit mask of the present caches and the cores (or
- * caches) that are attached to the caches that will be powered. It then
- * computes which caches should be turned on to allow the cores requested to be
- * powered up.
- *
- * @present:       The bit mask of present caches
- * @cores_powered: A bit mask of cores (or L2 caches) that are desired to
- *                 be powered
- * @tilers_powered: The bit mask of tilers that are desired to be powered
- *
- * Return: A bit mask of the caches that should be turned on
- */
-static u64 get_desired_cache_status(u64 present, u64 cores_powered,
-		u64 tilers_powered)
+static void kbase_pm_l2_config_override(struct kbase_device *kbdev)
 {
-	u64 desired = 0;
+	u32 val;
 
-	while (present) {
-		/* Find out which is the highest set bit */
-		u64 bit = fls64(present) - 1;
-		u64 bit_mask = 1ull << bit;
-		/* Create a mask which has all bits from 'bit' upwards set */
+	/*
+	 * Skip if it is not supported
+	 */
+	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG))
+		return;
 
-		u64 mask = ~(bit_mask - 1);
+	/*
+	 * Skip if size and hash are not given explicitly,
+	 * which means default values are used.
+	 */
+	if ((kbdev->l2_size_override == 0) && (kbdev->l2_hash_override == 0))
+		return;
 
-		/* If there are any cores powered at this bit or above (that
-		 * haven't previously been processed) then we need this core on
+	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_CONFIG));
+
+	if (kbdev->l2_size_override) {
+		val &= ~L2_CONFIG_SIZE_MASK;
+		val |= (kbdev->l2_size_override << L2_CONFIG_SIZE_SHIFT);
+	}
+
+	if (kbdev->l2_hash_override) {
+		val &= ~L2_CONFIG_HASH_MASK;
+		val |= (kbdev->l2_hash_override << L2_CONFIG_HASH_SHIFT);
+	}
+
+	dev_dbg(kbdev->dev, "Program 0x%x to L2_CONFIG\n", val);
+
+	/* Write L2_CONFIG to override */
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_CONFIG), val);
+}
+
+static void kbase_pm_control_gpu_clock(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *const backend = &kbdev->pm.backend;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	queue_work(system_wq, &backend->gpu_clock_control_work);
+}
+
+static const char *kbase_l2_core_state_to_string(enum kbase_l2_core_state state)
+{
+	const char *const strings[] = {
+#define KBASEP_L2_STATE(n) #n,
+#include "mali_kbase_pm_l2_states.h"
+#undef KBASEP_L2_STATE
+	};
+	if (WARN_ON((size_t)state >= ARRAY_SIZE(strings)))
+		return "Bad level 2 cache state";
+	else
+		return strings[state];
+}
+
+static u64 kbase_pm_l2_update_state(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present;
+	u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present;
+	enum kbase_l2_core_state prev_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	do {
+		/* Get current state */
+		u64 l2_trans = kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_L2);
+		u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
+				KBASE_PM_CORE_L2);
+		u64 tiler_trans = kbase_pm_get_trans_cores(kbdev,
+				KBASE_PM_CORE_TILER);
+		u64 tiler_ready = kbase_pm_get_ready_cores(kbdev,
+				KBASE_PM_CORE_TILER);
+
+		/* mask off ready from trans in case transitions finished
+		 * between the register reads
 		 */
-		if (cores_powered & mask)
-			desired |= bit_mask;
+		l2_trans &= ~l2_ready;
+		tiler_trans &= ~tiler_ready;
 
-		/* Remove bits from cores_powered and present */
-		cores_powered &= ~mask;
-		present &= ~bit_mask;
-	}
+		prev_state = backend->l2_state;
 
-	/* Power up the required L2(s) for the tiler */
-	if (tilers_powered)
-		desired |= 1;
+		switch (backend->l2_state) {
+		case KBASE_L2_OFF:
+			if (kbase_pm_is_l2_desired(kbdev)) {
+				/*
+				 * Set the desired config for L2 before powering
+				 * it on
+				 */
+				kbase_pm_l2_config_override(kbdev);
 
-	return desired;
-}
+				/* L2 is required, power on.  Powering on the
+				 * tiler will also power the first L2 cache.
+				 */
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_TILER,
+						tiler_present, ACTION_PWRON);
 
-KBASE_EXPORT_TEST_API(get_desired_cache_status);
+				/* If we have more than one L2 cache then we
+				 * must power them on explicitly.
+				 */
+				if (l2_present != 1)
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+							l2_present & ~1,
+							ACTION_PWRON);
+				backend->l2_state = KBASE_L2_PEND_ON;
+			}
+			break;
 
-#ifdef CONFIG_MALI_CORESTACK
-u64 kbase_pm_core_stack_mask(u64 cores)
-{
-	u64 stack_mask = 0;
-	size_t const MAX_CORE_ID = 31;
-	size_t const NUM_CORES_PER_STACK = 4;
-	size_t i;
+		case KBASE_L2_PEND_ON:
+			if (!l2_trans && l2_ready == l2_present && !tiler_trans
+					&& tiler_ready == tiler_present) {
+				KBASE_TRACE_ADD(kbdev,
+						PM_CORES_CHANGE_AVAILABLE_TILER,
+						NULL, NULL, 0u,
+						(u32)tiler_ready);
+				/*
+				 * Ensure snoops are enabled after L2 is powered
+				 * up. Note that kbase keeps track of the snoop
+				 * state, so safe to repeatedly call.
+				 */
+				kbase_pm_cache_snoop_enable(kbdev);
 
-	for (i = 0; i <= MAX_CORE_ID; ++i) {
-		if (test_bit(i, (unsigned long *)&cores)) {
-			/* Every core which ID >= 16 is filled to stacks 4-7
-			 * instead of 0-3 */
-			size_t const stack_num = (i >= 16) ?
-				(i % NUM_CORES_PER_STACK) + 4 :
-				(i % NUM_CORES_PER_STACK);
-			set_bit(stack_num, (unsigned long *)&stack_mask);
+				/* With the L2 enabled, we can now enable
+				 * hardware counters.
+				 */
+				if (kbdev->pm.backend.gpu_clock_slow_down_wa)
+					backend->l2_state =
+						KBASE_L2_RESTORE_CLOCKS;
+				else
+					backend->l2_state =
+						KBASE_L2_ON_HWCNT_ENABLE;
+
+				/* Now that the L2 is on, the shaders can start
+				 * powering on if they're required. The obvious
+				 * way to do this would be to call
+				 * kbase_pm_shaders_update_state() here.
+				 * However, that would make the two state
+				 * machines mutually recursive, as the opposite
+				 * would be needed for powering down. Instead,
+				 * callers of this function should use the
+				 * kbase_pm_update_state() wrapper, which will
+				 * call the shader state machine immediately
+				 * after the L2 (for power up), or
+				 * automatically re-invoke the L2 state machine
+				 * when the shaders power down.
+				 */
+			}
+			break;
+
+		case KBASE_L2_RESTORE_CLOCKS:
+			/* We always assume only GPUs being affected by
+			 * BASE_HW_ISSUE_GPU2017_1336 fall into this state
+			 */
+			WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa);
+
+			/* If L2 not needed, we need to make sure cancellation
+			 * of any previously issued work to restore GPU clock.
+			 * For it, move to KBASE_L2_SLOW_DOWN_CLOCKS state.
+			 */
+			if (!kbase_pm_is_l2_desired(kbdev)) {
+				backend->l2_state = KBASE_L2_SLOW_DOWN_CLOCKS;
+				break;
+			}
+
+			backend->gpu_clock_slow_down_desired = false;
+			if (backend->gpu_clock_slowed_down)
+				kbase_pm_control_gpu_clock(kbdev);
+			else
+				backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE;
+			break;
+
+		case KBASE_L2_ON_HWCNT_ENABLE:
+			backend->hwcnt_desired = true;
+			if (backend->hwcnt_disabled) {
+				kbase_hwcnt_context_enable(
+					kbdev->hwcnt_gpu_ctx);
+				backend->hwcnt_disabled = false;
+			}
+			backend->l2_state = KBASE_L2_ON;
+			break;
+
+		case KBASE_L2_ON:
+			if (!kbase_pm_is_l2_desired(kbdev)) {
+				/* Do not power off L2 until the shaders and
+				 * core stacks are off.
+				 */
+				if (backend->shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
+					break;
+
+				/* We need to make sure hardware counters are
+				 * disabled before powering down the L2, to
+				 * prevent loss of data.
+				 *
+				 * We waited until after the cores were powered
+				 * down to prevent ping-ponging between hwcnt
+				 * enabled and disabled, which would have
+				 * happened if userspace submitted more work
+				 * while we were trying to power down.
+				 */
+				backend->l2_state = KBASE_L2_ON_HWCNT_DISABLE;
+			}
+			break;
+
+		case KBASE_L2_ON_HWCNT_DISABLE:
+			/* If the L2 became desired while we were waiting on the
+			 * worker to do the actual hwcnt disable (which might
+			 * happen if some work was submitted immediately after
+			 * the shaders powered off), then we need to early-out
+			 * of this state and re-enable hwcnt.
+			 *
+			 * If we get lucky, the hwcnt disable might not have
+			 * actually started yet, and the logic in the hwcnt
+			 * enable state will prevent the worker from
+			 * performing the disable entirely, preventing loss of
+			 * any hardware counter data.
+			 *
+			 * If the hwcnt disable has started, then we'll lose
+			 * a tiny amount of hardware counter data between the
+			 * disable and the re-enable occurring.
+			 *
+			 * This loss of data is preferable to the alternative,
+			 * which is to block the shader cores from doing any
+			 * work until we're sure hwcnt has been re-enabled.
+			 */
+			if (kbase_pm_is_l2_desired(kbdev)) {
+				backend->l2_state = KBASE_L2_ON_HWCNT_ENABLE;
+				break;
+			}
+
+			backend->hwcnt_desired = false;
+			if (!backend->hwcnt_disabled) {
+				kbase_pm_trigger_hwcnt_disable(kbdev);
+			}
+
+			if (backend->hwcnt_disabled) {
+				if (kbdev->pm.backend.gpu_clock_slow_down_wa)
+					backend->l2_state =
+						KBASE_L2_SLOW_DOWN_CLOCKS;
+				else
+					backend->l2_state = KBASE_L2_POWER_DOWN;
+			}
+			break;
+
+		case KBASE_L2_SLOW_DOWN_CLOCKS:
+			/* We always assume only GPUs being affected by
+			 * BASE_HW_ISSUE_GPU2017_1336 fall into this state
+			 */
+			WARN_ON_ONCE(!kbdev->pm.backend.gpu_clock_slow_down_wa);
+
+			/* L2 needs to be powered up. And we need to make sure
+			 * cancellation of any previously issued work to slow
+			 * down GPU clock. For it, we move to the state,
+			 * KBASE_L2_RESTORE_CLOCKS.
+			 */
+			if (kbase_pm_is_l2_desired(kbdev)) {
+				backend->l2_state = KBASE_L2_RESTORE_CLOCKS;
+				break;
+			}
+
+			backend->gpu_clock_slow_down_desired = true;
+			if (!backend->gpu_clock_slowed_down)
+				kbase_pm_control_gpu_clock(kbdev);
+			else
+				backend->l2_state = KBASE_L2_POWER_DOWN;
+
+			break;
+
+		case KBASE_L2_POWER_DOWN:
+			if (!platform_power_down_only && !backend->l2_always_on)
+				/* Powering off the L2 will also power off the
+				 * tiler.
+				 */
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+						l2_present,
+						ACTION_PWROFF);
+			else
+				/* If L2 cache is powered then we must flush it
+				 * before we power off the GPU. Normally this
+				 * would have been handled when the L2 was
+				 * powered off.
+				 */
+				kbase_gpu_start_cache_clean_nolock(
+						kbdev);
+
+			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+					NULL, NULL, 0u, 0u);
+
+			backend->l2_state = KBASE_L2_PEND_OFF;
+			break;
+
+		case KBASE_L2_PEND_OFF:
+			if (!platform_power_down_only && !backend->l2_always_on) {
+				/* We only need to check the L2 here - if the L2
+				 * is off then the tiler is definitely also off.
+				 */
+				if (!l2_trans && !l2_ready)
+					/* L2 is now powered off */
+					backend->l2_state = KBASE_L2_OFF;
+			} else {
+				if (!kbdev->cache_clean_in_progress)
+					backend->l2_state = KBASE_L2_OFF;
+			}
+			break;
+
+		case KBASE_L2_RESET_WAIT:
+			/* Reset complete  */
+			if (!backend->in_reset)
+				backend->l2_state = KBASE_L2_OFF;
+			break;
+
+		default:
+			WARN(1, "Invalid state in l2_state: %d",
+					backend->l2_state);
 		}
+
+		if (backend->l2_state != prev_state)
+			dev_dbg(kbdev->dev, "L2 state transition: %s to %s\n",
+				kbase_l2_core_state_to_string(prev_state),
+				kbase_l2_core_state_to_string(
+					backend->l2_state));
+
+	} while (backend->l2_state != prev_state);
+
+	if (kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off &&
+			backend->l2_state == KBASE_L2_OFF) {
+		kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off = false;
+		queue_work(kbdev->pm.backend.gpu_poweroff_wait_wq,
+				&kbdev->pm.backend.gpu_poweroff_wait_work);
 	}
 
-	return stack_mask;
+	if (backend->l2_state == KBASE_L2_ON)
+		return l2_present;
+	return 0;
 }
-#endif /* CONFIG_MALI_CORESTACK */
 
-bool
-MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+static void shader_poweroff_timer_stop_callback(struct work_struct *data)
 {
-	bool cores_are_available = false;
-	bool in_desired_state = true;
-	u64 desired_l2_state;
-#ifdef CONFIG_MALI_CORESTACK
-	u64 desired_stack_state;
-	u64 stacks_powered;
-#endif /* CONFIG_MALI_CORESTACK */
-	u64 cores_powered;
-	u64 tilers_powered;
-	u64 tiler_available_bitmap;
-	u64 tiler_transitioning_bitmap;
-	u64 shader_available_bitmap;
-	u64 shader_ready_bitmap;
-	u64 shader_transitioning_bitmap;
-	u64 l2_available_bitmap;
-	u64 prev_l2_available_bitmap;
-	u64 l2_inuse_bitmap;
+	unsigned long flags;
+	struct kbasep_pm_tick_timer_state *stt = container_of(data,
+			struct kbasep_pm_tick_timer_state, work);
+	struct kbase_device *kbdev = container_of(stt, struct kbase_device,
+			pm.backend.shader_tick_timer);
 
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	hrtimer_cancel(&stt->timer);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	stt->cancel_queued = false;
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_pm_update_state(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/**
+ * shader_poweroff_timer_queue_cancel - cancel the shader poweroff tick timer
+ * @kbdev:      pointer to kbase device
+ *
+ * Synchronization between the shader state machine and the timer thread is
+ * difficult. This is because situations may arise where the state machine
+ * wants to start the timer, but the callback is already running, and has
+ * already passed the point at which it checks whether it is required, and so
+ * cancels itself, even though the state machine may have just tried to call
+ * hrtimer_start.
+ *
+ * This cannot be stopped by holding hwaccess_lock in the timer thread,
+ * because there are still infinitesimally small sections at the start and end
+ * of the callback where the lock is not held.
+ *
+ * Instead, a new state is added to the shader state machine,
+ * KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF. This is used to guarantee
+ * that when the shaders are switched off, the timer has definitely been
+ * cancelled. As a result, when KBASE_SHADERS_ON_CORESTACK_ON is left and the
+ * timer is started, it is guaranteed that either the timer is already running
+ * (from an availability change or cancelled timer), or hrtimer_start will
+ * succeed. It is critical to avoid ending up in
+ * KBASE_SHADERS_WAIT_OFF_CORESTACK_ON without the timer running, or it could
+ * hang there forever.
+ */
+static void shader_poweroff_timer_queue_cancel(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_tick_timer_state *stt =
+			&kbdev->pm.backend.shader_tick_timer;
+
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	spin_lock(&kbdev->pm.backend.gpu_powered_lock);
-	if (kbdev->pm.backend.gpu_powered == false) {
-		spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
-		if (kbdev->pm.backend.desired_shader_state == 0 &&
-				kbdev->pm.backend.desired_tiler_state == 0)
-			return true;
-		return false;
+	stt->needed = false;
+
+	if (hrtimer_active(&stt->timer) && !stt->cancel_queued) {
+		stt->cancel_queued = true;
+		queue_work(stt->wq, &stt->work);
 	}
+}
 
-	/* If any cores are already powered then, we must keep the caches on */
-	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
-							KBASE_PM_CORE_SHADER);
-	cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
-	cores_powered |= kbdev->pm.backend.desired_shader_state;
+static const char *kbase_shader_core_state_to_string(
+	enum kbase_shader_core_state state)
+{
+	const char *const strings[] = {
+#define KBASEP_SHADER_STATE(n) #n,
+#include "mali_kbase_pm_shader_states.h"
+#undef KBASEP_SHADER_STATE
+	};
+	if (WARN_ON((size_t)state >= ARRAY_SIZE(strings)))
+		return "Bad shader core state";
+	else
+		return strings[state];
+}
 
-#ifdef CONFIG_MALI_CORESTACK
-	/* Work out which core stacks want to be powered */
-	desired_stack_state = kbase_pm_core_stack_mask(cores_powered);
-	stacks_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK) |
-		desired_stack_state;
-#endif /* CONFIG_MALI_CORESTACK */
+static void kbase_pm_shaders_update_state(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	struct kbasep_pm_tick_timer_state *stt =
+			&kbdev->pm.backend.shader_tick_timer;
+	enum kbase_shader_core_state prev_state;
+	u64 stacks_avail = 0;
 
-	/* Work out which tilers want to be powered */
-	tiler_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
-							KBASE_PM_CORE_TILER);
-	tilers_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_TILER);
-	tilers_powered |= kbdev->pm.backend.desired_tiler_state;
+	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	/* If there are l2 cache users registered, keep all l2s powered even if
-	 * all other cores are off. */
-	if (kbdev->l2_users_count > 0)
-		cores_powered |= kbdev->gpu_props.props.raw_props.l2_present;
+	if (corestack_driver_control)
+		/* Always power on all the corestacks. Disabling certain
+		 * corestacks when their respective shaders are not in the
+		 * available bitmap is not currently supported.
+		 */
+		stacks_avail = kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_STACK);
 
-	desired_l2_state = get_desired_cache_status(
-			kbdev->gpu_props.props.raw_props.l2_present,
-			cores_powered, tilers_powered);
+	do {
+		u64 shaders_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_SHADER);
+		u64 shaders_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+		u64 stacks_trans = 0;
+		u64 stacks_ready = 0;
 
-	l2_inuse_bitmap = get_desired_cache_status(
-			kbdev->gpu_props.props.raw_props.l2_present,
-			cores_powered | shader_transitioning_bitmap,
-			tilers_powered | tiler_transitioning_bitmap);
+		if (corestack_driver_control) {
+			stacks_trans = kbase_pm_get_trans_cores(kbdev, KBASE_PM_CORE_STACK);
+			stacks_ready = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_STACK);
+		}
 
-#ifdef CONFIG_MALI_CORESTACK
-	if (stacks_powered)
-		desired_l2_state |= 1;
-#endif /* CONFIG_MALI_CORESTACK */
+		/* mask off ready from trans in case transitions finished
+		 * between the register reads
+		 */
+		shaders_trans &= ~shaders_ready;
+		stacks_trans &= ~stacks_ready;
 
-	/* If any l2 cache is on, then enable l2 #0, for use by job manager */
-	if (0 != desired_l2_state)
-		desired_l2_state |= 1;
+		prev_state = backend->shaders_state;
 
-	prev_l2_available_bitmap = kbdev->l2_available_bitmap;
-	in_desired_state &= kbase_pm_transition_core_type(kbdev,
-			KBASE_PM_CORE_L2, desired_l2_state, l2_inuse_bitmap,
-			&l2_available_bitmap,
-			&kbdev->pm.backend.powering_on_l2_state);
+		switch (backend->shaders_state) {
+		case KBASE_SHADERS_OFF_CORESTACK_OFF:
+			/* Ignore changes to the shader core availability
+			 * except at certain points where we can handle it,
+			 * i.e. off and SHADERS_ON_CORESTACK_ON.
+			 */
+			backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
+			backend->pm_shaders_core_mask = 0;
 
-	kbdev->l2_available_bitmap = l2_available_bitmap;
+			if (backend->shaders_desired &&
+				backend->l2_state == KBASE_L2_ON) {
+				if (backend->hwcnt_desired &&
+					!backend->hwcnt_disabled) {
+					/* Trigger a hwcounter dump */
+					backend->hwcnt_desired = false;
+					kbase_pm_trigger_hwcnt_disable(kbdev);
+				}
 
+				if (backend->hwcnt_disabled) {
+					if (corestack_driver_control) {
+						kbase_pm_invoke(kbdev,
+							KBASE_PM_CORE_STACK,
+							stacks_avail,
+							ACTION_PWRON);
+					}
+					backend->shaders_state =
+						KBASE_SHADERS_OFF_CORESTACK_PEND_ON;
+				}
+			}
+			break;
 
-#ifdef CONFIG_MALI_CORESTACK
-	if (in_desired_state) {
-		in_desired_state &= kbase_pm_transition_core_type(kbdev,
-				KBASE_PM_CORE_STACK, desired_stack_state, 0,
-				&kbdev->stack_available_bitmap,
-				&kbdev->pm.backend.powering_on_stack_state);
-	}
-#endif /* CONFIG_MALI_CORESTACK */
+		case KBASE_SHADERS_OFF_CORESTACK_PEND_ON:
+			if (!stacks_trans && stacks_ready == stacks_avail) {
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+						backend->shaders_avail, ACTION_PWRON);
 
-	if (in_desired_state) {
-		in_desired_state &= kbase_pm_transition_core_type(kbdev,
-				KBASE_PM_CORE_TILER,
-				kbdev->pm.backend.desired_tiler_state,
-				0, &tiler_available_bitmap,
-				&kbdev->pm.backend.powering_on_tiler_state);
-		in_desired_state &= kbase_pm_transition_core_type(kbdev,
-				KBASE_PM_CORE_SHADER,
-				kbdev->pm.backend.desired_shader_state,
-				0, &shader_available_bitmap,
-				&kbdev->pm.backend.powering_on_shader_state);
+				backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON;
+			}
+			break;
 
-		if (kbdev->shader_available_bitmap != shader_available_bitmap)
-			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
-						NULL, 0u,
-						(u32) shader_available_bitmap);
+		case KBASE_SHADERS_PEND_ON_CORESTACK_ON:
+			if (!shaders_trans && shaders_ready == backend->shaders_avail) {
+				KBASE_TRACE_ADD(kbdev,
+						PM_CORES_CHANGE_AVAILABLE,
+						NULL, NULL, 0u, (u32)shaders_ready);
+				backend->pm_shaders_core_mask = shaders_ready;
+				backend->hwcnt_desired = true;
+				if (backend->hwcnt_disabled) {
+					kbase_hwcnt_context_enable(
+						kbdev->hwcnt_gpu_ctx);
+					backend->hwcnt_disabled = false;
+				}
+				backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON;
+			}
+			break;
 
-		kbdev->shader_available_bitmap = shader_available_bitmap;
+		case KBASE_SHADERS_ON_CORESTACK_ON:
+			backend->shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
 
-		if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
-			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
-						NULL, NULL, 0u,
-						(u32) tiler_available_bitmap);
+			/* If shaders to change state, trigger a counter dump */
+			if (!backend->shaders_desired ||
+				(backend->shaders_avail & ~shaders_ready)) {
+				backend->hwcnt_desired = false;
+				if (!backend->hwcnt_disabled)
+					kbase_pm_trigger_hwcnt_disable(kbdev);
+				backend->shaders_state =
+					KBASE_SHADERS_ON_CORESTACK_ON_RECHECK;
+			}
+			break;
 
-		kbdev->tiler_available_bitmap = tiler_available_bitmap;
+		case KBASE_SHADERS_ON_CORESTACK_ON_RECHECK:
+			backend->shaders_avail =
+				kbase_pm_ca_get_core_mask(kbdev);
 
-	} else if ((l2_available_bitmap &
-			kbdev->gpu_props.props.raw_props.tiler_present) !=
-			kbdev->gpu_props.props.raw_props.tiler_present) {
-		tiler_available_bitmap = 0;
+			if (!backend->hwcnt_disabled) {
+				/* Wait for being disabled */
+				;
+			} else if (!backend->shaders_desired) {
+				if (kbdev->pm.backend.protected_transition_override ||
+						!stt->configured_ticks ||
+						WARN_ON(stt->cancel_queued)) {
+					backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
+				} else {
+					stt->remaining_ticks = stt->configured_ticks;
+					stt->needed = true;
 
-		kbdev->tiler_available_bitmap = tiler_available_bitmap;
-	}
+					/* The shader hysteresis timer is not
+					 * done the obvious way, which would be
+					 * to start an hrtimer when the shader
+					 * power off is requested. Instead,
+					 * use a 'tick' timer, and set the
+					 * remaining number of ticks on a power
+					 * off request.  This avoids the
+					 * latency of starting, then
+					 * immediately cancelling an hrtimer
+					 * when the shaders are re-requested
+					 * before the timeout expires.
+					 */
+					if (!hrtimer_active(&stt->timer))
+						hrtimer_start(&stt->timer,
+								stt->configured_interval,
+								HRTIMER_MODE_REL);
 
-	/* State updated for slow-path waiters */
-	kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+					backend->shaders_state = KBASE_SHADERS_WAIT_OFF_CORESTACK_ON;
+				}
+			} else {
+				if (backend->shaders_avail & ~shaders_ready) {
+					backend->shaders_avail |= shaders_ready;
 
-	shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
-							KBASE_PM_CORE_SHADER);
-	shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
-							KBASE_PM_CORE_SHADER);
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+							backend->shaders_avail & ~shaders_ready,
+							ACTION_PWRON);
+				}
+				backend->shaders_state = KBASE_SHADERS_PEND_ON_CORESTACK_ON;
+			}
+			break;
 
-	/* Determine whether the cores are now available (even if the set of
-	 * available cores is empty). Note that they can be available even if
-	 * we've not finished transitioning to the desired state */
-	if ((kbdev->shader_available_bitmap &
-					kbdev->pm.backend.desired_shader_state)
-				== kbdev->pm.backend.desired_shader_state &&
-		(kbdev->tiler_available_bitmap &
-					kbdev->pm.backend.desired_tiler_state)
-				== kbdev->pm.backend.desired_tiler_state) {
-		cores_are_available = true;
+		case KBASE_SHADERS_WAIT_OFF_CORESTACK_ON:
+			if (WARN_ON(!hrtimer_active(&stt->timer))) {
+				stt->remaining_ticks = 0;
+				backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
+			}
 
-		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
-				(u32)(kbdev->shader_available_bitmap &
-				kbdev->pm.backend.desired_shader_state));
-		KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
-				(u32)(kbdev->tiler_available_bitmap &
-				kbdev->pm.backend.desired_tiler_state));
-	}
+			if (backend->shaders_desired) {
+				stt->remaining_ticks = 0;
+				backend->shaders_state = KBASE_SHADERS_ON_CORESTACK_ON_RECHECK;
+			} else if (stt->remaining_ticks == 0) {
+				backend->shaders_state = KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON;
+			}
+			break;
 
-	if (in_desired_state) {
-		KBASE_DEBUG_ASSERT(cores_are_available);
+		case KBASE_SHADERS_WAIT_FINISHED_CORESTACK_ON:
+			shader_poweroff_timer_queue_cancel(kbdev);
+			if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_921)) {
+				kbase_gpu_start_cache_clean_nolock(kbdev);
+				backend->shaders_state =
+					KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON;
+			} else {
+				backend->shaders_state =
+					KBASE_SHADERS_READY_OFF_CORESTACK_ON;
+			}
+			break;
 
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-		kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
-						kbase_pm_get_ready_cores(kbdev,
-							KBASE_PM_CORE_L2));
-		kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
-						kbase_pm_get_ready_cores(kbdev,
-							KBASE_PM_CORE_SHADER));
-		kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
-						kbase_pm_get_ready_cores(kbdev,
-							KBASE_PM_CORE_TILER));
-#ifdef CONFIG_MALI_CORESTACK
-		kbase_trace_mali_pm_status(KBASE_PM_CORE_STACK,
-						kbase_pm_get_ready_cores(kbdev,
-							KBASE_PM_CORE_STACK));
-#endif /* CONFIG_MALI_CORESTACK */
-#endif
+		case KBASE_SHADERS_L2_FLUSHING_CORESTACK_ON:
+			if (!kbdev->cache_clean_in_progress)
+				backend->shaders_state =
+					KBASE_SHADERS_READY_OFF_CORESTACK_ON;
 
+			break;
+
+		case KBASE_SHADERS_READY_OFF_CORESTACK_ON:
+			if (!platform_power_down_only)
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_SHADER,
+						shaders_ready, ACTION_PWROFF);
+
+			KBASE_TRACE_ADD(kbdev,
+					PM_CORES_CHANGE_AVAILABLE,
+					NULL, NULL, 0u, 0u);
+
+			backend->shaders_state = KBASE_SHADERS_PEND_OFF_CORESTACK_ON;
+			break;
+
+		case KBASE_SHADERS_PEND_OFF_CORESTACK_ON:
+			if ((!shaders_trans && !shaders_ready) || platform_power_down_only) {
+				if (corestack_driver_control && !platform_power_down_only)
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_STACK,
+							stacks_avail, ACTION_PWROFF);
+
+				backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_PEND_OFF;
+			}
+			break;
+
+		case KBASE_SHADERS_OFF_CORESTACK_PEND_OFF:
+			if ((!stacks_trans && !stacks_ready) ||
+				platform_power_down_only) {
+				/* On powered off, re-enable the hwcnt */
+				backend->pm_shaders_core_mask = 0;
+				backend->hwcnt_desired = true;
+				if (backend->hwcnt_disabled) {
+					kbase_hwcnt_context_enable(
+						kbdev->hwcnt_gpu_ctx);
+					backend->hwcnt_disabled = false;
+				}
+				backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF;
+			}
+			break;
+
+		case KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF:
+			if (!hrtimer_active(&stt->timer) && !stt->cancel_queued)
+				backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF;
+			break;
+
+		case KBASE_SHADERS_RESET_WAIT:
+			/* Reset complete */
+			if (!backend->in_reset)
+				backend->shaders_state = KBASE_SHADERS_OFF_CORESTACK_OFF_TIMER_PEND_OFF;
+			break;
+		}
+
+		if (backend->shaders_state != prev_state)
+			dev_dbg(kbdev->dev, "Shader state transition: %s to %s\n",
+				kbase_shader_core_state_to_string(prev_state),
+				kbase_shader_core_state_to_string(
+					backend->shaders_state));
+
+	} while (backend->shaders_state != prev_state);
+}
+
+static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
+{
+	bool in_desired_state = true;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (kbase_pm_is_l2_desired(kbdev) &&
+			kbdev->pm.backend.l2_state != KBASE_L2_ON)
+		in_desired_state = false;
+	else if (!kbase_pm_is_l2_desired(kbdev) &&
+			kbdev->pm.backend.l2_state != KBASE_L2_OFF)
+		in_desired_state = false;
+
+	if (kbdev->pm.backend.shaders_desired &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON)
+		in_desired_state = false;
+	else if (!kbdev->pm.backend.shaders_desired &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
+		in_desired_state = false;
+
+	return in_desired_state;
+}
+
+static bool kbase_pm_is_in_desired_state(struct kbase_device *kbdev)
+{
+	bool in_desired_state;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	in_desired_state = kbase_pm_is_in_desired_state_nolock(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return in_desired_state;
+}
+
+static bool kbase_pm_is_in_desired_state_with_l2_powered(
+		struct kbase_device *kbdev)
+{
+	bool in_desired_state = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (kbase_pm_is_in_desired_state_nolock(kbdev) &&
+			(kbdev->pm.backend.l2_state == KBASE_L2_ON))
+		in_desired_state = true;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return in_desired_state;
+}
+
+static void kbase_pm_trace_power_state(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	KBASE_TLSTREAM_AUX_PM_STATE(
+			kbdev,
+			KBASE_PM_CORE_L2,
+			kbase_pm_get_ready_cores(
+				kbdev, KBASE_PM_CORE_L2));
+	KBASE_TLSTREAM_AUX_PM_STATE(
+			kbdev,
+			KBASE_PM_CORE_SHADER,
+			kbase_pm_get_ready_cores(
+				kbdev, KBASE_PM_CORE_SHADER));
+	KBASE_TLSTREAM_AUX_PM_STATE(
+			kbdev,
+			KBASE_PM_CORE_TILER,
+			kbase_pm_get_ready_cores(
+				kbdev,
+				KBASE_PM_CORE_TILER));
+
+	if (corestack_driver_control)
 		KBASE_TLSTREAM_AUX_PM_STATE(
-				KBASE_PM_CORE_L2,
-				kbase_pm_get_ready_cores(
-					kbdev, KBASE_PM_CORE_L2));
-		KBASE_TLSTREAM_AUX_PM_STATE(
-				KBASE_PM_CORE_SHADER,
-				kbase_pm_get_ready_cores(
-					kbdev, KBASE_PM_CORE_SHADER));
-		KBASE_TLSTREAM_AUX_PM_STATE(
-				KBASE_PM_CORE_TILER,
-				kbase_pm_get_ready_cores(
-					kbdev,
-					KBASE_PM_CORE_TILER));
-#ifdef CONFIG_MALI_CORESTACK
-		KBASE_TLSTREAM_AUX_PM_STATE(
+				kbdev,
 				KBASE_PM_CORE_STACK,
 				kbase_pm_get_ready_cores(
 					kbdev,
 					KBASE_PM_CORE_STACK));
-#endif /* CONFIG_MALI_CORESTACK */
+}
 
+void kbase_pm_update_state(struct kbase_device *kbdev)
+{
+	enum kbase_shader_core_state prev_shaders_state =
+			kbdev->pm.backend.shaders_state;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (!kbdev->pm.backend.gpu_powered)
+		return; /* Do nothing if the GPU is off */
+
+	kbase_pm_l2_update_state(kbdev);
+	kbase_pm_shaders_update_state(kbdev);
+
+	/* If the shaders just turned off, re-invoke the L2 state machine, in
+	 * case it was waiting for the shaders to turn off before powering down
+	 * the L2.
+	 */
+	if (prev_shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF &&
+			kbdev->pm.backend.shaders_state == KBASE_SHADERS_OFF_CORESTACK_OFF)
+		kbase_pm_l2_update_state(kbdev);
+
+	if (kbase_pm_is_in_desired_state_nolock(kbdev)) {
 		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
-				kbdev->pm.backend.gpu_in_desired_state,
-				(u32)kbdev->pm.backend.desired_shader_state);
-		KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
-				(u32)kbdev->pm.backend.desired_tiler_state);
+				true, kbdev->pm.backend.shaders_avail);
 
-		/* Wake slow-path waiters. Job scheduler does not use this. */
+		kbase_pm_trace_power_state(kbdev);
+
 		KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
-
 		wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
 	}
-
-	spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
-
-	kbdev->shader_ready_bitmap = shader_ready_bitmap;
-	kbdev->shader_transitioning_bitmap = shader_transitioning_bitmap;
-
-	/* The core availability policy is not allowed to keep core group 0
-	 * turned off (unless it was changing the l2 power state) */
-	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
-		kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
-		(prev_l2_available_bitmap == desired_l2_state) &&
-		!(kbase_pm_ca_get_core_mask(kbdev) &
-		kbdev->gpu_props.props.coherency_info.group[0].core_mask))
-		BUG();
-
-	/* The core availability policy is allowed to keep core group 1 off,
-	 * but all jobs specifically targeting CG1 must fail */
-	if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
-		kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
-		!(kbase_pm_ca_get_core_mask(kbdev) &
-		kbdev->gpu_props.props.coherency_info.group[1].core_mask))
-		kbdev->pm.backend.cg1_disabled = true;
-	else
-		kbdev->pm.backend.cg1_disabled = false;
-
-	return cores_are_available;
 }
-KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
 
-/* Timeout for kbase_pm_check_transitions_sync when wait_event_killable has
+static enum hrtimer_restart
+shader_tick_timer_callback(struct hrtimer *timer)
+{
+	struct kbasep_pm_tick_timer_state *stt = container_of(timer,
+			struct kbasep_pm_tick_timer_state, timer);
+	struct kbase_device *kbdev = container_of(stt, struct kbase_device,
+			pm.backend.shader_tick_timer);
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	unsigned long flags;
+	enum hrtimer_restart restart = HRTIMER_NORESTART;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	if (stt->remaining_ticks &&
+			backend->shaders_state == KBASE_SHADERS_WAIT_OFF_CORESTACK_ON) {
+		stt->remaining_ticks--;
+
+		/* If the remaining ticks just changed from 1 to 0, invoke the
+		 * PM state machine to power off the shader cores.
+		 */
+		if (!stt->remaining_ticks && !backend->shaders_desired)
+			kbase_pm_update_state(kbdev);
+	}
+
+	if (stt->needed) {
+		hrtimer_forward_now(timer, stt->configured_interval);
+		restart = HRTIMER_RESTART;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return restart;
+}
+
+int kbase_pm_state_machine_init(struct kbase_device *kbdev)
+{
+	struct kbasep_pm_tick_timer_state *stt = &kbdev->pm.backend.shader_tick_timer;
+
+	stt->wq = alloc_workqueue("kbase_pm_shader_poweroff", WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!stt->wq)
+		return -ENOMEM;
+
+	INIT_WORK(&stt->work, shader_poweroff_timer_stop_callback);
+
+	stt->needed = false;
+	hrtimer_init(&stt->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	stt->timer.function = shader_tick_timer_callback;
+	stt->configured_interval = HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+	stt->configured_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+
+	return 0;
+}
+
+void kbase_pm_state_machine_term(struct kbase_device *kbdev)
+{
+	hrtimer_cancel(&kbdev->pm.backend.shader_tick_timer.timer);
+	destroy_workqueue(kbdev->pm.backend.shader_tick_timer.wq);
+}
+
+void kbase_pm_reset_start_locked(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	backend->in_reset = true;
+	backend->l2_state = KBASE_L2_RESET_WAIT;
+	backend->shaders_state = KBASE_SHADERS_RESET_WAIT;
+
+	/* We're in a reset, so hwcnt will have been synchronously disabled by
+	 * this function's caller as part of the reset process. We therefore
+	 * know that any call to kbase_hwcnt_context_disable_atomic, if
+	 * required to sync the hwcnt refcount with our internal state, is
+	 * guaranteed to succeed.
+	 */
+	backend->hwcnt_desired = false;
+	if (!backend->hwcnt_disabled) {
+		WARN_ON(!kbase_hwcnt_context_disable_atomic(
+			kbdev->hwcnt_gpu_ctx));
+		backend->hwcnt_disabled = true;
+	}
+
+	shader_poweroff_timer_queue_cancel(kbdev);
+}
+
+void kbase_pm_reset_complete(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+	unsigned long flags;
+
+	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* As GPU has just been reset, that results in implicit flush of L2
+	 * cache, can safely mark the pending cache flush operation (if there
+	 * was any) as complete and unblock the waiter.
+	 * No work can be submitted whilst GPU reset is ongoing.
+	 */
+	kbase_gpu_cache_clean_wait_complete(kbdev);
+	backend->in_reset = false;
+	kbase_pm_update_state(kbdev);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has
  * aborted due to a fatal signal. If the time spent waiting has exceeded this
  * threshold then there is most likely a hardware issue. */
 #define PM_TIMEOUT (5*HZ) /* 5s */
 
-void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+static void kbase_pm_timed_out(struct kbase_device *kbdev)
+{
+	dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
+	dev_err(kbdev->dev, "Desired state :\n");
+	dev_err(kbdev->dev, "\tShader=%016llx\n",
+			kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0);
+	dev_err(kbdev->dev, "Current state :\n");
+	dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_READY_HI)),
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(SHADER_READY_LO)));
+	dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_READY_HI)),
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(TILER_READY_LO)));
+	dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_READY_HI)),
+			kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(L2_READY_LO)));
+	dev_err(kbdev->dev, "Cores transitioning :\n");
+	dev_err(kbdev->dev, "\tShader=%08x%08x\n",
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					SHADER_PWRTRANS_HI)),
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					SHADER_PWRTRANS_LO)));
+	dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					TILER_PWRTRANS_HI)),
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					TILER_PWRTRANS_LO)));
+	dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					L2_PWRTRANS_HI)),
+			kbase_reg_read(kbdev, GPU_CONTROL_REG(
+					L2_PWRTRANS_LO)));
+
+	dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+}
+
+void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
 {
 	unsigned long flags;
 	unsigned long timeout;
-	bool cores_are_available;
-	int ret;
+	int err;
 
-	/* Force the transition to be checked and reported - the cores may be
-	 * 'available' (for job submission) but not fully powered up. */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
-
-	/* Don't need 'cores_are_available', because we don't return anything */
-	CSTD_UNUSED(cores_are_available);
+	kbase_pm_update_state(kbdev);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	timeout = jiffies + PM_TIMEOUT;
 
 	/* Wait for cores */
-	ret = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
-			kbdev->pm.backend.gpu_in_desired_state);
+	err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbase_pm_is_in_desired_state_with_l2_powered(kbdev));
 
-	if (ret < 0 && time_after(jiffies, timeout)) {
-		dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
-		dev_err(kbdev->dev, "Desired state :\n");
-		dev_err(kbdev->dev, "\tShader=%016llx\n",
-				kbdev->pm.backend.desired_shader_state);
-		dev_err(kbdev->dev, "\tTiler =%016llx\n",
-				kbdev->pm.backend.desired_tiler_state);
-		dev_err(kbdev->dev, "Current state :\n");
-		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
-				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(SHADER_READY_HI)),
-				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(SHADER_READY_LO)));
-		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
-				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(TILER_READY_HI)),
-				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(TILER_READY_LO)));
-		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
-				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(L2_READY_HI)),
-				kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(L2_READY_LO)));
-		dev_err(kbdev->dev, "Cores transitioning :\n");
-		dev_err(kbdev->dev, "\tShader=%08x%08x\n",
-				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						SHADER_PWRTRANS_HI)),
-				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						SHADER_PWRTRANS_LO)));
-		dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
-				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						TILER_PWRTRANS_HI)),
-				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						TILER_PWRTRANS_LO)));
-		dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
-				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						L2_PWRTRANS_HI)),
-				kbase_reg_read(kbdev, GPU_CONTROL_REG(
-						L2_PWRTRANS_LO)));
-#if KBASE_GPU_RESET_EN
-		dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
-		if (kbase_prepare_to_reset_gpu(kbdev))
-			kbase_reset_gpu(kbdev);
-#endif /* KBASE_GPU_RESET_EN */
-	}
+	if (err < 0 && time_after(jiffies, timeout))
+		kbase_pm_timed_out(kbdev);
 }
-KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	unsigned long timeout;
+	int err;
+
+	/* Let the state machine latch the most recent desired state. */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_pm_update_state(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	timeout = jiffies + PM_TIMEOUT;
+
+	/* Wait for cores */
+	err = wait_event_killable(kbdev->pm.backend.gpu_in_desired_state_wait,
+			kbase_pm_is_in_desired_state(kbdev));
+
+	if (err < 0 && time_after(jiffies, timeout))
+		kbase_pm_timed_out(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state);
 
 void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
 {
@@ -957,7 +1508,6 @@
 
 KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
 
-
 /*
  * pmu layout:
  * 0x0000: PMU TAG (RO) (0xCAFECAFE)
@@ -990,17 +1540,12 @@
 		kbdev->pm.backend.callback_power_resume(kbdev);
 		return;
 	} else if (kbdev->pm.backend.callback_power_on) {
-		kbdev->pm.backend.callback_power_on(kbdev);
-		/* If your platform properly keeps the GPU state you may use the
-		 * return value of the callback_power_on function to
-		 * conditionally reset the GPU on power up. Currently we are
-		 * conservative and always reset the GPU. */
-		reset_required = true;
+		reset_required = kbdev->pm.backend.callback_power_on(kbdev);
 	}
 
-	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbdev->pm.backend.gpu_powered = true;
-	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (reset_required) {
 		/* GPU state was lost, reset GPU to ensure it is in a
@@ -1014,8 +1559,14 @@
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
-	/* Lastly, enable the interrupts */
+	/* Enable the interrupts */
 	kbase_pm_enable_interrupts(kbdev);
+
+	/* Turn on the L2 caches */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->pm.backend.l2_desired = true;
+	kbase_pm_update_state(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
@@ -1028,7 +1579,7 @@
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	/* ASSERT that the cores should now be unavailable. No lock needed. */
-	KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+	WARN_ON(kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF);
 
 	kbdev->poweroff_pending = true;
 
@@ -1046,13 +1597,12 @@
 	/* Ensure that any IRQ handlers have finished */
 	kbase_synchronize_irqs(kbdev);
 
-	spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	if (atomic_read(&kbdev->faults_pending)) {
 		/* Page/bus faults are still being processed. The GPU can not
 		 * be powered off until they have completed */
-		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
-									flags);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return false;
 	}
 
@@ -1060,7 +1610,7 @@
 
 	/* The GPU power may be turned off from this point */
 	kbdev->pm.backend.gpu_powered = false;
-	spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	if (is_suspend && kbdev->pm.backend.callback_power_suspend)
 		kbdev->pm.backend.callback_power_suspend(kbdev);
@@ -1116,36 +1666,72 @@
 	return HRTIMER_NORESTART;
 }
 
-static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+static void kbase_set_jm_quirks(struct kbase_device *kbdev, const u32 prod_id)
 {
-	struct device_node *np = kbdev->dev->of_node;
-	u32 jm_values[4];
-	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
-	const u32 major = (gpu_id & GPU_ID_VERSION_MAJOR) >>
-		GPU_ID_VERSION_MAJOR_SHIFT;
+	kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
+				GPU_CONTROL_REG(JM_CONFIG));
+	if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) == GPU_ID2_PRODUCT_TMIX) {
+		/* Only for tMIx */
+		u32 coherency_features;
 
-	kbdev->hw_quirks_sc = 0;
+		coherency_features = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(COHERENCY_FEATURES));
 
-	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
-	 * and needed due to MIDGLES-3539. See PRLAM-11035 */
+		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
+		 * documented for tMIx so force correct value here.
+		 */
+		if (coherency_features ==
+				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
+			kbdev->hw_quirks_jm |= (COHERENCY_ACE_LITE |
+					COHERENCY_ACE) <<
+					JM_FORCE_COHERENCY_FEATURES_SHIFT;
+		}
+	}
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_IDVS_GROUP_SIZE)) {
+		int default_idvs_group_size = 0xF;
+		u32 tmp;
+
+		if (of_property_read_u32(kbdev->dev->of_node,
+					"idvs-group-size", &tmp))
+			tmp = default_idvs_group_size;
+
+		if (tmp > JM_MAX_IDVS_GROUP_SIZE) {
+			dev_err(kbdev->dev,
+				"idvs-group-size of %d is too large. Maximum value is %d",
+				tmp, JM_MAX_IDVS_GROUP_SIZE);
+			tmp = default_idvs_group_size;
+		}
+
+		kbdev->hw_quirks_jm |= tmp << JM_IDVS_GROUP_SIZE_SHIFT;
+	}
+
+#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
+	if (corestack_driver_control)
+		kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
+}
+
+static void kbase_set_sc_quirks(struct kbase_device *kbdev, const u32 prod_id)
+{
+	kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(SHADER_CONFIG));
+
+	/* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE.
+	 * See PRLAM-8443 and needed due to MIDGLES-3539.
+	 * See PRLAM-11035.
+	 */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
 			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
 		kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
 
-	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+	/* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD.
+	 * See PRLAM-10327.
 	 */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
 		kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
 
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
-	/* Enable alternative hardware counter selection if configured. */
-	if (!GPU_ID_IS_NEW_FORMAT(prod_id))
-		kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
-#endif
-
-	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+	/* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS.
+	 * See PRLAM-10797.
+	 */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
 		kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
 
@@ -1156,110 +1742,69 @@
 			kbdev->hw_quirks_sc |= SC_LS_ALLOW_ATTR_TYPES;
 	}
 
-	if (!kbdev->hw_quirks_sc)
-		kbdev->hw_quirks_sc = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(SHADER_CONFIG));
-
-	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
-			GPU_CONTROL_REG(TILER_CONFIG));
-
-	/* Set tiler clock gate override if required */
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
-		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
-
-	/* Limit the GPU bus bandwidth if the platform needs this. */
-	kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
-			GPU_CONTROL_REG(L2_MMU_CONFIG));
-
-
-	/* Limit read & write ID width for AXI */
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG)) {
-		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS);
-		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_ARID_LIMIT & 0x7) <<
-				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT;
-
-		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES);
-		kbdev->hw_quirks_mmu |= (DEFAULT_3BIT_AWID_LIMIT & 0x7) <<
-				L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT;
-	} else {
-		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
-		kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
-				L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
-
-		kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
-		kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
-				L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
-	}
-
-	if (kbdev->system_coherency == COHERENCY_ACE) {
-		/* Allow memory configuration disparity to be ignored, we
-		 * optimize the use of shared memory and thus we expect
-		 * some disparity in the memory configuration */
-		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
-	}
-
-	kbdev->hw_quirks_jm = 0;
-	/* Only for T86x/T88x-based products after r2p0 */
-	if (prod_id >= 0x860 && prod_id <= 0x880 && major >= 2) {
-
-		if (of_property_read_u32_array(np,
-					"jm_config",
-					&jm_values[0],
-					ARRAY_SIZE(jm_values))) {
-			/* Entry not in device tree, use defaults  */
-			jm_values[0] = 0;
-			jm_values[1] = 0;
-			jm_values[2] = 0;
-			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
-		}
-
-		/* Limit throttle limit to 6 bits*/
-		if (jm_values[3] > JM_MAX_JOB_THROTTLE_LIMIT) {
-			dev_dbg(kbdev->dev, "JOB_THROTTLE_LIMIT supplied in device tree is too large. Limiting to MAX (63).");
-			jm_values[3] = JM_MAX_JOB_THROTTLE_LIMIT;
-		}
-
-		/* Aggregate to one integer. */
-		kbdev->hw_quirks_jm |= (jm_values[0] ?
-				JM_TIMESTAMP_OVERRIDE : 0);
-		kbdev->hw_quirks_jm |= (jm_values[1] ?
-				JM_CLOCK_GATE_OVERRIDE : 0);
-		kbdev->hw_quirks_jm |= (jm_values[2] ?
-				JM_JOB_THROTTLE_ENABLE : 0);
-		kbdev->hw_quirks_jm |= (jm_values[3] <<
-				JM_JOB_THROTTLE_LIMIT_SHIFT);
-
-	} else if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
-			   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
-					   GPU_ID2_PRODUCT_TMIX)) {
-		/* Only for tMIx */
-		u32 coherency_features;
-
-		coherency_features = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(COHERENCY_FEATURES));
-
-		/* (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
-		 * documented for tMIx so force correct value here.
-		 */
-		if (coherency_features ==
-				COHERENCY_FEATURE_BIT(COHERENCY_ACE)) {
-			kbdev->hw_quirks_jm |=
-				(COHERENCY_ACE_LITE | COHERENCY_ACE) <<
-				JM_FORCE_COHERENCY_FEATURES_SHIFT;
-		}
-	}
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_2968_TTRX_3162))
+		kbdev->hw_quirks_sc |= SC_VAR_ALGORITHM;
 
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_TLS_HASHING))
 		kbdev->hw_quirks_sc |= SC_TLS_HASH_ENABLE;
+}
 
-	if (!kbdev->hw_quirks_jm)
-		kbdev->hw_quirks_jm = kbase_reg_read(kbdev,
-				GPU_CONTROL_REG(JM_CONFIG));
+static void kbase_set_tiler_quirks(struct kbase_device *kbdev)
+{
+	kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+					GPU_CONTROL_REG(TILER_CONFIG));
+	/* Set tiler clock gate override if required */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+		kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+}
 
-#ifdef CONFIG_MALI_CORESTACK
-#define MANUAL_POWER_CONTROL ((u32)(1 << 8))
-	kbdev->hw_quirks_jm |= MANUAL_POWER_CONTROL;
-#endif /* CONFIG_MALI_CORESTACK */
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+				GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	kbdev->hw_quirks_jm = 0;
+	kbdev->hw_quirks_sc = 0;
+	kbdev->hw_quirks_tiler = 0;
+	kbdev->hw_quirks_mmu = 0;
+
+	if (!of_property_read_u32(np, "quirks_jm",
+				&kbdev->hw_quirks_jm)) {
+		dev_info(kbdev->dev,
+			"Found quirks_jm = [0x%x] in Devicetree\n",
+			kbdev->hw_quirks_jm);
+	} else {
+		kbase_set_jm_quirks(kbdev, prod_id);
+	}
+
+	if (!of_property_read_u32(np, "quirks_sc",
+				&kbdev->hw_quirks_sc)) {
+		dev_info(kbdev->dev,
+			"Found quirks_sc = [0x%x] in Devicetree\n",
+			kbdev->hw_quirks_sc);
+	} else {
+		kbase_set_sc_quirks(kbdev, prod_id);
+	}
+
+	if (!of_property_read_u32(np, "quirks_tiler",
+				&kbdev->hw_quirks_tiler)) {
+		dev_info(kbdev->dev,
+			"Found quirks_tiler = [0x%x] in Devicetree\n",
+			kbdev->hw_quirks_tiler);
+	} else {
+		kbase_set_tiler_quirks(kbdev);
+	}
+
+	if (!of_property_read_u32(np, "quirks_mmu",
+				&kbdev->hw_quirks_mmu)) {
+		dev_info(kbdev->dev,
+			"Found quirks_mmu = [0x%x] in Devicetree\n",
+			kbdev->hw_quirks_mmu);
+	} else {
+		kbase_set_mmu_quirks(kbdev);
+	}
 }
 
 static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
@@ -1272,10 +1817,8 @@
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
 			kbdev->hw_quirks_mmu);
-
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(JM_CONFIG),
 			kbdev->hw_quirks_jm);
-
 }
 
 void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
@@ -1305,13 +1848,26 @@
 	}
 }
 
+static void reenable_protected_mode_hwcnt(struct kbase_device *kbdev)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	kbdev->protected_mode_hwcnt_desired = true;
+	if (kbdev->protected_mode_hwcnt_disabled) {
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+		kbdev->protected_mode_hwcnt_disabled = false;
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+}
+
 static int kbase_pm_do_reset(struct kbase_device *kbdev)
 {
 	struct kbasep_reset_timeout_data rtdata;
 
 	KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
 
-	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev);
+	KBASE_TLSTREAM_JD_GPU_SOFT_RESET(kbdev, kbdev);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
 						GPU_COMMAND_SOFT_RESET);
@@ -1411,7 +1967,6 @@
 {
 	unsigned long irq_flags;
 	int err;
-	bool resume_vinstr = false;
 
 	KBASE_DEBUG_ASSERT(NULL != kbdev);
 	lockdep_assert_held(&kbdev->pm.lock);
@@ -1421,11 +1976,7 @@
 		if (kbdev->pm.backend.callback_power_on)
 			kbdev->pm.backend.callback_power_on(kbdev);
 
-		spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
-								irq_flags);
 		kbdev->pm.backend.gpu_powered = true;
-		spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
-								irq_flags);
 	}
 
 	/* Ensure interrupts are off to begin with, this also clears any
@@ -1438,15 +1989,9 @@
 
 	/* The cores should be made unavailable due to the reset */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
-	if (kbdev->shader_available_bitmap != 0u)
-			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
-						NULL, 0u, (u32)0u);
-	if (kbdev->tiler_available_bitmap != 0u)
-			KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
-						NULL, NULL, 0u, (u32)0u);
-	kbdev->shader_available_bitmap = 0u;
-	kbdev->tiler_available_bitmap = 0u;
-	kbdev->l2_available_bitmap = 0u;
+	if (kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
+		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+				NULL, 0u, (u32)0u);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 
 	/* Soft reset the GPU */
@@ -1457,10 +2002,7 @@
 		err = kbase_pm_do_reset(kbdev);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
-	if (kbdev->protected_mode)
-		resume_vinstr = true;
 	kbdev->protected_mode = false;
-
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 
 	if (err)
@@ -1484,9 +2026,7 @@
 	 * false when called from kbase_pm_powerup */
 	if (kbdev->pm.backend.gpu_cycle_counter_requests &&
 						(flags & PM_ENABLE_IRQS)) {
-		/* enable interrupts as the L2 may have to be powered on */
 		kbase_pm_enable_interrupts(kbdev);
-		kbase_pm_request_l2_caches(kbdev);
 
 		/* Re-enable the counters if we need to */
 		spin_lock_irqsave(
@@ -1499,10 +2039,6 @@
 			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
 								irq_flags);
 
-		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
-		kbase_pm_release_l2_caches(kbdev);
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
-
 		kbase_pm_disable_interrupts(kbdev);
 	}
 
@@ -1510,9 +2046,12 @@
 		kbase_pm_enable_interrupts(kbdev);
 
 exit:
-	/* If GPU is leaving protected mode resume vinstr operation. */
-	if (kbdev->vinstr_ctx && resume_vinstr)
-		kbase_vinstr_resume(kbdev->vinstr_ctx);
+	if (!kbdev->pm.backend.protected_entry_transition_override) {
+		/* Re-enable GPU hardware counters if we're resetting from
+		 * protected mode.
+		 */
+		reenable_protected_mode_hwcnt(kbdev);
+	}
 
 	return err;
 }
@@ -1527,9 +2066,8 @@
  * kbase_pm_request_gpu_cycle_counter() or
  * kbase_pm_request_gpu_cycle_counter_l2_is_on() only
  *
- * When this function is called the l2 cache must be on and the l2 cache users
- * count must have been incremented by a call to (
- * kbase_pm_request_l2_caches() or kbase_pm_request_l2_caches_l2_on() )
+ * When this function is called the l2 cache must be on - i.e., the GPU must be
+ * on.
  *
  * @kbdev:     The kbase device structure of the device
  */
@@ -1561,8 +2099,6 @@
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
 								INT_MAX);
 
-	kbase_pm_request_l2_caches(kbdev);
-
 	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
 }
 
@@ -1577,8 +2113,6 @@
 	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
 								INT_MAX);
 
-	kbase_pm_request_l2_caches_l2_is_on(kbdev);
-
 	kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
 }
 
@@ -1606,8 +2140,6 @@
 	spin_unlock_irqrestore(
 			&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
 									flags);
-
-	kbase_pm_release_l2_caches(kbdev);
 }
 
 void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
index 0d3599a..6ca6a71 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -163,7 +163,7 @@
  * kbase_pm_disable_interrupts - Disable interrupts on the device.
  *
  * This prevents delivery of Power Management interrupts to the CPU so that
- * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * kbase_pm_update_state() will not be called from the IRQ handler
  * until kbase_pm_enable_interrupts() or kbase_pm_clock_on() is called.
  *
  * Interrupts are also disabled after a call to kbase_pm_clock_off().
@@ -206,58 +206,43 @@
  */
 void kbase_pm_reset_done(struct kbase_device *kbdev);
 
-
 /**
- * kbase_pm_check_transitions_nolock - Check if there are any power transitions
- *                                     to make, and if so start them.
+ * kbase_pm_wait_for_desired_state - Wait for the desired power state to be
+ *                                   reached
  *
- * This function will check the desired_xx_state members of
- * struct kbase_pm_device_data and the actual status of the hardware to see if
- * any power transitions can be made at this time to make the hardware state
- * closer to the state desired by the power policy.
- *
- * The return value can be used to check whether all the desired cores are
- * available, and so whether it's worth submitting a job (e.g. from a Power
- * Management IRQ).
- *
- * Note that this still returns true when desired_xx_state has no
- * cores. That is: of the no cores desired, none were *un*available. In
- * this case, the caller may still need to try submitting jobs. This is because
- * the Core Availability Policy might have taken us to an intermediate state
- * where no cores are powered, before powering on more cores (e.g. for core
- * rotation)
- *
- * The caller must hold kbase_device.pm.power_change_lock
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Return:      non-zero when all desired cores are available. That is,
- *              it's worthwhile for the caller to submit a job.
- *              false otherwise
- */
-bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
-
-/**
- * kbase_pm_check_transitions_sync - Synchronous and locking variant of
- *                                   kbase_pm_check_transitions_nolock()
- *
- * On returning, the desired state at the time of the call will have been met.
- *
- * There is nothing to stop the core being switched off by calls to
- * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
- * caller must have already made a call to
- * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ * Wait for the L2 and shader power state machines to reach the states
+ * corresponding to the values of 'l2_desired' and 'shaders_desired'.
  *
  * The usual use-case for this is to ensure cores are 'READY' after performing
  * a GPU Reset.
  *
- * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
- * kbase_device.pm.power_change_lock, because this function will take that
- * lock itself.
+ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
+ * because this function will take that lock itself.
+ *
+ * NOTE: This may not wait until the correct state is reached if there is a
+ * power off in progress. To correctly wait for the desired state the caller
+ * must ensure that this is not the case by, for example, calling
+ * kbase_pm_wait_for_poweroff_complete()
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
-void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+void kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
+ *
+ * Wait for the L2 to be powered on, and for the L2 and shader state machines to
+ * stabilise by reaching the states corresponding to the values of 'l2_desired'
+ * and 'shaders_desired'.
+ *
+ * kbdev->pm.active_count must be non-zero when calling this function.
+ *
+ * Unlike kbase_pm_update_state(), the caller must not hold hwaccess_lock,
+ * because this function will take that lock itself.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev);
 
 /**
  * kbase_pm_update_cores_state_nolock - Variant of kbase_pm_update_cores_state()
@@ -269,6 +254,25 @@
 void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
 
 /**
+ * kbase_pm_update_state - Update the L2 and shader power state machines
+ * @kbdev: Device pointer
+ */
+void kbase_pm_update_state(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_state_machine_init - Initialize the state machines, primarily the
+ *                               shader poweroff timer
+ * @kbdev: Device pointer
+ */
+int kbase_pm_state_machine_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_state_machine_term - Clean up the PM state machines' data
+ * @kbdev: Device pointer
+ */
+void kbase_pm_state_machine_term(struct kbase_device *kbdev);
+
+/**
  * kbase_pm_update_cores_state - Update the desired state of shader cores from
  *                               the Power Policy, and begin any power
  *                               transitions.
@@ -283,24 +287,6 @@
 void kbase_pm_update_cores_state(struct kbase_device *kbdev);
 
 /**
- * kbase_pm_cancel_deferred_poweroff - Cancel any pending requests to power off
- *                                     the GPU and/or shader cores.
- *
- * This should be called by any functions which directly power off the GPU.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- */
-void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
-
-/**
- * kbasep_pm_init_core_use_bitmaps - Initialise data tracking the required
- *                                   and used cores.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- */
-void kbasep_pm_init_core_use_bitmaps(struct kbase_device *kbdev);
-
-/**
  * kbasep_pm_metrics_init - Initialize the metrics gathering framework.
  *
  * This must be called before other metric gathering APIs are called.
@@ -577,4 +563,109 @@
 void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask);
 #endif
 
+/**
+ * kbase_pm_reset_start_locked - Signal that GPU reset has started
+ * @kbdev: Device pointer
+ *
+ * Normal power management operation will be suspended until the reset has
+ * completed.
+ *
+ * Caller must hold hwaccess_lock.
+ */
+void kbase_pm_reset_start_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_reset_complete - Signal that GPU reset has completed
+ * @kbdev: Device pointer
+ *
+ * Normal power management operation will be resumed. The power manager will
+ * re-evaluate what cores are needed and power on or off as required.
+ */
+void kbase_pm_reset_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_override_enable - Enable the protected mode override
+ * @kbdev: Device pointer
+ *
+ * When the protected mode override is enabled, all shader cores are requested
+ * to power down, and the L2 power state can be controlled by
+ * kbase_pm_protected_l2_override().
+ *
+ * Caller must hold hwaccess_lock.
+ */
+void kbase_pm_protected_override_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_override_disable - Disable the protected mode override
+ * @kbdev: Device pointer
+ *
+ * Caller must hold hwaccess_lock.
+ */
+void kbase_pm_protected_override_disable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_l2_override - Control the protected mode L2 override
+ * @kbdev: Device pointer
+ * @override: true to enable the override, false to disable
+ *
+ * When the driver is transitioning in or out of protected mode, the L2 cache is
+ * forced to power off. This can be overridden to force the L2 cache to power
+ * on. This is required to change coherency settings on some GPUs.
+ */
+void kbase_pm_protected_l2_override(struct kbase_device *kbdev, bool override);
+
+/**
+ * kbase_pm_protected_entry_override_enable - Enable the protected mode entry
+ *                                            override
+ * @kbdev: Device pointer
+ *
+ * Initiate a GPU reset and enable the protected mode entry override flag if
+ * l2_always_on WA is enabled and platform is fully coherent. If the GPU
+ * reset is already ongoing then protected mode entry override flag will not
+ * be enabled and function will have to be called again.
+ *
+ * When protected mode entry override flag is enabled to power down L2 via GPU
+ * reset, the GPU reset handling behavior gets changed. For example call to
+ * kbase_backend_reset() is skipped, Hw counters are not re-enabled and L2
+ * isn't powered up again post reset.
+ * This is needed only as a workaround for a Hw issue where explicit power down
+ * of L2 causes a glitch. For entering protected mode on fully coherent
+ * platforms L2 needs to be powered down to switch to IO coherency mode, so to
+ * avoid the glitch GPU reset is used to power down L2. Hence, this function
+ * does nothing on systems where the glitch issue isn't present.
+ *
+ * Caller must hold hwaccess_lock. Should be only called during the transition
+ * to enter protected mode.
+ *
+ * Return: -EAGAIN if a GPU reset was required for the glitch workaround but
+ * was already ongoing, otherwise 0.
+ */
+int kbase_pm_protected_entry_override_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_protected_entry_override_disable - Disable the protected mode entry
+ *                                             override
+ * @kbdev: Device pointer
+ *
+ * This shall be called once L2 has powered down and switch to IO coherency
+ * mode has been made. As with kbase_pm_protected_entry_override_enable(),
+ * this function does nothing on systems where the glitch issue isn't present.
+ *
+ * Caller must hold hwaccess_lock. Should be only called during the transition
+ * to enter protected mode.
+ */
+void kbase_pm_protected_entry_override_disable(struct kbase_device *kbdev);
+
+/* If true, the driver should explicitly control corestack power management,
+ * instead of relying on the Power Domain Controller.
+ */
+extern bool corestack_driver_control;
+
+/* If true, disable powering-down of individual cores, and just power-down at
+ * the top-level using platform-specific code.
+ * If false, use the expected behaviour of controlling the individual cores
+ * from within the driver.
+ */
+extern bool platform_power_down_only;
+
 #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h
new file mode 100644
index 0000000..12cb051
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_l2_states.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific Power Manager level 2 cache state definitions.
+ * The function-like macro KBASEP_L2_STATE() must be defined before including
+ * this header file. This header file can be included multiple times in the
+ * same compilation unit with different definitions of KBASEP_L2_STATE().
+ */
+KBASEP_L2_STATE(OFF)
+KBASEP_L2_STATE(PEND_ON)
+KBASEP_L2_STATE(RESTORE_CLOCKS)
+KBASEP_L2_STATE(ON_HWCNT_ENABLE)
+KBASEP_L2_STATE(ON)
+KBASEP_L2_STATE(ON_HWCNT_DISABLE)
+KBASEP_L2_STATE(SLOW_DOWN_CLOCKS)
+KBASEP_L2_STATE(POWER_DOWN)
+KBASEP_L2_STATE(PEND_OFF)
+KBASEP_L2_STATE(RESET_WAIT)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
index 6b9b686..ae494b0 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -80,6 +80,7 @@
 	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
 	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
 	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[2] = 0;
 
 	kbdev->pm.backend.metrics.values.time_busy = 0;
 	kbdev->pm.backend.metrics.values.time_idle = 0;
@@ -90,19 +91,15 @@
 	spin_lock_init(&kbdev->pm.backend.metrics.lock);
 
 #ifdef CONFIG_MALI_MIDGARD_DVFS
-	kbdev->pm.backend.metrics.timer_active = true;
 	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
 							HRTIMER_MODE_REL);
 	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
 
-	hrtimer_start(&kbdev->pm.backend.metrics.timer,
-			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
-			HRTIMER_MODE_REL);
+	kbase_pm_metrics_start(kbdev);
 #endif /* CONFIG_MALI_MIDGARD_DVFS */
 
 	return 0;
 }
-
 KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
 
 void kbasep_pm_metrics_term(struct kbase_device *kbdev)
@@ -148,6 +145,8 @@
 			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
 		if (kbdev->pm.backend.metrics.active_gl_ctx[1])
 			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
+		if (kbdev->pm.backend.metrics.active_gl_ctx[2])
+			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
 	} else {
 		kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff)
 							>> KBASE_PM_TIME_SHIFT);
@@ -221,6 +220,29 @@
 }
 KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
 
+void kbase_pm_metrics_start(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = true;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+			HRTIMER_MODE_REL);
+}
+
+void kbase_pm_metrics_stop(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+	kbdev->pm.backend.metrics.timer_active = false;
+	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+}
+
+
 #endif /* CONFIG_MALI_MIDGARD_DVFS */
 
 /**
@@ -238,6 +260,7 @@
 
 	kbdev->pm.backend.metrics.active_gl_ctx[0] = 0;
 	kbdev->pm.backend.metrics.active_gl_ctx[1] = 0;
+	kbdev->pm.backend.metrics.active_gl_ctx[2] = 0;
 	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
 	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
 	kbdev->pm.backend.metrics.gpu_active = false;
@@ -260,11 +283,7 @@
 					kbdev->pm.backend.metrics.
 						active_cl_ctx[device_nr] = 1;
 			} else {
-				/* Slot 2 should not be running non-compute
-				 * atoms */
-				if (!WARN_ON(js >= 2))
-					kbdev->pm.backend.metrics.
-						active_gl_ctx[js] = 1;
+				kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
 			}
 			kbdev->pm.backend.metrics.gpu_active = true;
 		}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
index 6dd00a9..3152424 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,218 +27,49 @@
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
 #include <mali_kbase_pm.h>
-#include <mali_kbase_config_defaults.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
-static const struct kbase_pm_policy *const policy_list[] = {
+static const struct kbase_pm_policy *const all_policy_list[] = {
 #ifdef CONFIG_MALI_NO_MALI
 	&kbase_pm_always_on_policy_ops,
-	&kbase_pm_demand_policy_ops,
 	&kbase_pm_coarse_demand_policy_ops,
 #if !MALI_CUSTOMER_RELEASE
-	&kbase_pm_demand_always_powered_policy_ops,
-	&kbase_pm_fast_start_policy_ops,
+	&kbase_pm_always_on_demand_policy_ops,
 #endif
 #else				/* CONFIG_MALI_NO_MALI */
-#if !PLATFORM_POWER_DOWN_ONLY
-	&kbase_pm_demand_policy_ops,
-#endif /* !PLATFORM_POWER_DOWN_ONLY */
 	&kbase_pm_coarse_demand_policy_ops,
-	&kbase_pm_always_on_policy_ops,
 #if !MALI_CUSTOMER_RELEASE
-#if !PLATFORM_POWER_DOWN_ONLY
-	&kbase_pm_demand_always_powered_policy_ops,
-	&kbase_pm_fast_start_policy_ops,
-#endif /* !PLATFORM_POWER_DOWN_ONLY */
+	&kbase_pm_always_on_demand_policy_ops,
 #endif
+	&kbase_pm_always_on_policy_ops
 #endif /* CONFIG_MALI_NO_MALI */
 };
 
-/* The number of policies available in the system.
- * This is derived from the number of functions listed in policy_get_functions.
- */
-#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
-
-
-/* Function IDs for looking up Timeline Trace codes in
- * kbase_pm_change_state_trace_code */
-enum kbase_pm_func_id {
-	KBASE_PM_FUNC_ID_REQUEST_CORES_START,
-	KBASE_PM_FUNC_ID_REQUEST_CORES_END,
-	KBASE_PM_FUNC_ID_RELEASE_CORES_START,
-	KBASE_PM_FUNC_ID_RELEASE_CORES_END,
-	/* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
-	 * expect to hit it nor tend to hit it very much anyway. We can detect
-	 * whether we need more instrumentation by a difference between
-	 * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
-
-	/* Must be the last */
-	KBASE_PM_FUNC_ID_COUNT
-};
-
-
-/* State changes during request/unrequest/release-ing cores */
-enum {
-	KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
-	KBASE_PM_CHANGE_STATE_TILER  = (1u << 1),
-
-	/* These two must be last */
-	KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
-						KBASE_PM_CHANGE_STATE_SHADER),
-	KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
-};
-typedef u32 kbase_pm_change_state;
-
-/**
- * kbasep_pm_do_poweroff_cores - Process a poweroff request and power down any
- *                               requested shader cores
- * @kbdev: Device pointer
- */
-static void kbasep_pm_do_poweroff_cores(struct kbase_device *kbdev)
+static void generate_filtered_policy_list(struct kbase_device *kbdev)
 {
-	u64 prev_shader_state = kbdev->pm.backend.desired_shader_state;
-	u64 prev_tiler_state = kbdev->pm.backend.desired_tiler_state;
+	size_t i;
 
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	for (i = 0; i < ARRAY_SIZE(all_policy_list); ++i) {
+		const struct kbase_pm_policy *pol = all_policy_list[i];
 
-	kbdev->pm.backend.desired_shader_state &=
-			~kbdev->pm.backend.shader_poweroff_pending;
-	kbdev->pm.backend.desired_tiler_state &=
-			~kbdev->pm.backend.tiler_poweroff_pending;
+		BUILD_BUG_ON(ARRAY_SIZE(all_policy_list) >
+			KBASE_PM_MAX_NUM_POLICIES);
+		if (platform_power_down_only &&
+				(pol->flags & KBASE_PM_POLICY_FLAG_DISABLED_WITH_POWER_DOWN_ONLY))
+			continue;
 
-	kbdev->pm.backend.shader_poweroff_pending = 0;
-	kbdev->pm.backend.tiler_poweroff_pending = 0;
-
-	if (prev_shader_state != kbdev->pm.backend.desired_shader_state ||
-			prev_tiler_state !=
-				kbdev->pm.backend.desired_tiler_state ||
-			kbdev->pm.backend.ca_in_transition) {
-		bool cores_are_available;
-
-		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
-
-		/* Don't need 'cores_are_available',
-		 * because we don't return anything */
-		CSTD_UNUSED(cores_are_available);
+		kbdev->policy_list[kbdev->policy_count++] = pol;
 	}
 }
 
-static enum hrtimer_restart
-kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
-{
-	struct kbase_device *kbdev;
-	unsigned long flags;
-
-	kbdev = container_of(timer, struct kbase_device,
-						pm.backend.gpu_poweroff_timer);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	/* It is safe for this call to do nothing if the work item is already
-	 * queued. The worker function will read the must up-to-date state of
-	 * kbdev->pm.backend.gpu_poweroff_pending under lock.
-	 *
-	 * If a state change occurs while the worker function is processing,
-	 * this call will succeed as a work item can be requeued once it has
-	 * started processing.
-	 */
-	if (kbdev->pm.backend.gpu_poweroff_pending)
-		queue_work(kbdev->pm.backend.gpu_poweroff_wq,
-					&kbdev->pm.backend.gpu_poweroff_work);
-
-	if (kbdev->pm.backend.shader_poweroff_pending ||
-			kbdev->pm.backend.tiler_poweroff_pending) {
-		kbdev->pm.backend.shader_poweroff_pending_time--;
-
-		KBASE_DEBUG_ASSERT(
-				kbdev->pm.backend.shader_poweroff_pending_time
-									>= 0);
-
-		if (!kbdev->pm.backend.shader_poweroff_pending_time)
-			kbasep_pm_do_poweroff_cores(kbdev);
-	}
-
-	if (kbdev->pm.backend.poweroff_timer_needed) {
-		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-		hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
-
-		return HRTIMER_RESTART;
-	}
-
-	kbdev->pm.backend.poweroff_timer_running = false;
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	return HRTIMER_NORESTART;
-}
-
-static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
-{
-	unsigned long flags;
-	struct kbase_device *kbdev;
-	bool do_poweroff = false;
-
-	kbdev = container_of(data, struct kbase_device,
-						pm.backend.gpu_poweroff_work);
-
-	mutex_lock(&kbdev->pm.lock);
-
-	if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
-		mutex_unlock(&kbdev->pm.lock);
-		return;
-	}
-
-	kbdev->pm.backend.gpu_poweroff_pending--;
-
-	if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
-		mutex_unlock(&kbdev->pm.lock);
-		return;
-	}
-
-	KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	/* Only power off the GPU if a request is still pending */
-	if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
-		do_poweroff = true;
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	if (do_poweroff) {
-		kbdev->pm.backend.poweroff_timer_needed = false;
-		hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
-		kbdev->pm.backend.poweroff_timer_running = false;
-
-		/* Power off the GPU */
-		kbase_pm_do_poweroff(kbdev, false);
-	}
-
-	mutex_unlock(&kbdev->pm.lock);
-}
-
 int kbase_pm_policy_init(struct kbase_device *kbdev)
 {
-	struct workqueue_struct *wq;
+	generate_filtered_policy_list(kbdev);
+	if (kbdev->policy_count == 0)
+		return -EINVAL;
 
-	wq = alloc_workqueue("kbase_pm_do_poweroff",
-			WQ_HIGHPRI | WQ_UNBOUND, 1);
-	if (!wq)
-		return -ENOMEM;
-
-	kbdev->pm.backend.gpu_poweroff_wq = wq;
-	INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
-			kbasep_pm_do_gpu_poweroff_wq);
-	hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
-			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	kbdev->pm.backend.gpu_poweroff_timer.function =
-			kbasep_pm_do_gpu_poweroff_callback;
-	kbdev->pm.backend.pm_current_policy = policy_list[0];
+	kbdev->pm.backend.pm_current_policy = kbdev->policy_list[0];
 	kbdev->pm.backend.pm_current_policy->init(kbdev);
-	kbdev->pm.gpu_poweroff_time =
-			HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
-	kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
-	kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
 
 	return 0;
 }
@@ -246,29 +77,6 @@
 void kbase_pm_policy_term(struct kbase_device *kbdev)
 {
 	kbdev->pm.backend.pm_current_policy->term(kbdev);
-	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
-}
-
-void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
-{
-	unsigned long flags;
-
-	lockdep_assert_held(&kbdev->pm.lock);
-
-	kbdev->pm.backend.poweroff_timer_needed = false;
-	hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbdev->pm.backend.poweroff_timer_running = false;
-
-	/* If wq is already running but is held off by pm.lock, make sure it has
-	 * no effect */
-	kbdev->pm.backend.gpu_poweroff_pending = 0;
-
-	kbdev->pm.backend.shader_poweroff_pending = 0;
-	kbdev->pm.backend.tiler_poweroff_pending = 0;
-	kbdev->pm.backend.shader_poweroff_pending_time = 0;
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_pm_update_active(struct kbase_device *kbdev)
@@ -291,35 +99,24 @@
 		kbdev->pm.backend.pm_current_policy->name);
 
 	if (active) {
-		if (backend->gpu_poweroff_pending) {
-			/* Cancel any pending power off request */
-			backend->gpu_poweroff_pending = 0;
-
-			/* If a request was pending then the GPU was still
-			 * powered, so no need to continue */
-			if (!kbdev->poweroff_pending) {
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
-						flags);
-				return;
-			}
-		}
-
-		if (!backend->poweroff_timer_running && !backend->gpu_powered &&
-				(pm->poweroff_gpu_ticks ||
-				pm->poweroff_shader_ticks)) {
-			backend->poweroff_timer_needed = true;
-			backend->poweroff_timer_running = true;
-			hrtimer_start(&backend->gpu_poweroff_timer,
-					pm->gpu_poweroff_time,
-					HRTIMER_MODE_REL);
-		}
-
 		/* Power on the GPU and any cores requested by the policy */
-		if (pm->backend.poweroff_wait_in_progress) {
+		if (!pm->backend.invoke_poweroff_wait_wq_when_l2_off &&
+				pm->backend.poweroff_wait_in_progress) {
 			KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
 			pm->backend.poweron_required = true;
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		} else {
+			/* Cancel the the invocation of
+			 * kbase_pm_gpu_poweroff_wait_wq() from the L2 state
+			 * machine. This is safe - it
+			 * invoke_poweroff_wait_wq_when_l2_off is true, then
+			 * the poweroff work hasn't even been queued yet,
+			 * meaning we can go straight to powering on.
+			 */
+			pm->backend.invoke_poweroff_wait_wq_when_l2_off = false;
+			pm->backend.poweroff_wait_in_progress = false;
+			pm->backend.l2_desired = true;
+
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 			kbase_pm_do_poweron(kbdev, false);
 		}
@@ -328,89 +125,21 @@
 		 * when there are contexts active */
 		KBASE_DEBUG_ASSERT(pm->active_count == 0);
 
-		if (backend->shader_poweroff_pending ||
-				backend->tiler_poweroff_pending) {
-			backend->shader_poweroff_pending = 0;
-			backend->tiler_poweroff_pending = 0;
-			backend->shader_poweroff_pending_time = 0;
-		}
-
 		/* Request power off */
 		if (pm->backend.gpu_powered) {
-			if (pm->poweroff_gpu_ticks) {
-				backend->gpu_poweroff_pending =
-						pm->poweroff_gpu_ticks;
-				backend->poweroff_timer_needed = true;
-				if (!backend->poweroff_timer_running) {
-					/* Start timer if not running (eg if
-					 * power policy has been changed from
-					 * always_on to something else). This
-					 * will ensure the GPU is actually
-					 * powered off */
-					backend->poweroff_timer_running
-							= true;
-					hrtimer_start(
-						&backend->gpu_poweroff_timer,
-						pm->gpu_poweroff_time,
-						HRTIMER_MODE_REL);
-				}
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
-						flags);
-			} else {
-				spin_unlock_irqrestore(&kbdev->hwaccess_lock,
-						flags);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-				/* Power off the GPU immediately */
-				kbase_pm_do_poweroff(kbdev, false);
-			}
+			/* Power off the GPU immediately */
+			kbase_pm_do_poweroff(kbdev, false);
 		} else {
 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		}
 	}
 }
 
-/**
- * get_desired_shader_bitmap - Get the desired shader bitmap, based on the
- *                             current power policy
- *
- * @kbdev: The kbase device structure for the device
- *
- * Queries the current power policy to determine if shader cores will be
- * required in the current state, and apply any HW workarounds.
- *
- * Return: bitmap of desired shader cores
- */
-
-static u64 get_desired_shader_bitmap(struct kbase_device *kbdev)
-{
-	u64 desired_bitmap = 0u;
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	if (kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev))
-		desired_bitmap = kbase_pm_ca_get_core_mask(kbdev);
-
-	WARN(!desired_bitmap && kbdev->shader_needed_cnt,
-			"Shader cores are needed but policy '%s' did not make them needed",
-			kbdev->pm.backend.pm_current_policy->name);
-
-	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY)) {
-		/* Unless XAFFINITY is supported, enable core 0 if tiler
-		 * required, regardless of core availability
-		 */
-		if (kbdev->tiler_needed_cnt > 0)
-			desired_bitmap |= 1;
-	}
-
-	return desired_bitmap;
-}
-
 void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
 {
-	u64 desired_bitmap;
-	u64 desired_tiler_bitmap;
-	bool cores_are_available;
-	bool do_poweroff = false;
+	bool shaders_desired;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
@@ -419,105 +148,20 @@
 	if (kbdev->pm.backend.poweroff_wait_in_progress)
 		return;
 
-	if (kbdev->protected_mode_transition && !kbdev->shader_needed_cnt &&
-			!kbdev->tiler_needed_cnt) {
+	if (kbdev->pm.backend.protected_transition_override)
 		/* We are trying to change in/out of protected mode - force all
 		 * cores off so that the L2 powers down */
-		desired_bitmap = 0;
-		desired_tiler_bitmap = 0;
-	} else {
-		desired_bitmap = get_desired_shader_bitmap(kbdev);
+		shaders_desired = false;
+	else
+		shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);
 
-		if (kbdev->tiler_needed_cnt > 0)
-			desired_tiler_bitmap = 1;
-		else
-			desired_tiler_bitmap = 0;
-	}
-
-	if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+	if (kbdev->pm.backend.shaders_desired != shaders_desired) {
 		KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
-							(u32)desired_bitmap);
-	/* Are any cores being powered on? */
-	if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
-	    ~kbdev->pm.backend.desired_tiler_state & desired_tiler_bitmap ||
-	    kbdev->pm.backend.ca_in_transition) {
-		/* Check if we are powering off any cores before updating shader
-		 * state */
-		if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
-				kbdev->pm.backend.desired_tiler_state &
-				~desired_tiler_bitmap) {
-			/* Start timer to power off cores */
-			kbdev->pm.backend.shader_poweroff_pending |=
-				(kbdev->pm.backend.desired_shader_state &
-							~desired_bitmap);
-			kbdev->pm.backend.tiler_poweroff_pending |=
-				(kbdev->pm.backend.desired_tiler_state &
-							~desired_tiler_bitmap);
+				(u32)kbdev->pm.backend.shaders_desired);
 
-			if (kbdev->pm.poweroff_shader_ticks &&
-					!kbdev->protected_mode_transition)
-				kbdev->pm.backend.shader_poweroff_pending_time =
-						kbdev->pm.poweroff_shader_ticks;
-			else
-				do_poweroff = true;
-		}
-
-		kbdev->pm.backend.desired_shader_state = desired_bitmap;
-		kbdev->pm.backend.desired_tiler_state = desired_tiler_bitmap;
-
-		/* If any cores are being powered on, transition immediately */
-		cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
-	} else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap ||
-				kbdev->pm.backend.desired_tiler_state &
-				~desired_tiler_bitmap) {
-		/* Start timer to power off cores */
-		kbdev->pm.backend.shader_poweroff_pending |=
-				(kbdev->pm.backend.desired_shader_state &
-							~desired_bitmap);
-		kbdev->pm.backend.tiler_poweroff_pending |=
-				(kbdev->pm.backend.desired_tiler_state &
-							~desired_tiler_bitmap);
-		if (kbdev->pm.poweroff_shader_ticks &&
-				!kbdev->protected_mode_transition)
-			kbdev->pm.backend.shader_poweroff_pending_time =
-					kbdev->pm.poweroff_shader_ticks;
-		else
-			kbasep_pm_do_poweroff_cores(kbdev);
-	} else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
-			desired_tiler_bitmap != 0 &&
-			kbdev->pm.backend.poweroff_timer_needed) {
-		/* If power policy is keeping cores on despite there being no
-		 * active contexts then disable poweroff timer as it isn't
-		 * required.
-		 * Only reset poweroff_timer_needed if we're not in the middle
-		 * of the power off callback */
-		kbdev->pm.backend.poweroff_timer_needed = false;
+		kbdev->pm.backend.shaders_desired = shaders_desired;
+		kbase_pm_update_state(kbdev);
 	}
-
-	/* Ensure timer does not power off wanted cores and make sure to power
-	 * off unwanted cores */
-	if (kbdev->pm.backend.shader_poweroff_pending ||
-			kbdev->pm.backend.tiler_poweroff_pending) {
-		kbdev->pm.backend.shader_poweroff_pending &=
-				~(kbdev->pm.backend.desired_shader_state &
-								desired_bitmap);
-		kbdev->pm.backend.tiler_poweroff_pending &=
-				~(kbdev->pm.backend.desired_tiler_state &
-				desired_tiler_bitmap);
-
-		if (!kbdev->pm.backend.shader_poweroff_pending &&
-				!kbdev->pm.backend.tiler_poweroff_pending)
-			kbdev->pm.backend.shader_poweroff_pending_time = 0;
-	}
-
-	/* Shader poweroff is deferred to the end of the function, to eliminate
-	 * issues caused by the core availability policy recursing into this
-	 * function */
-	if (do_poweroff)
-		kbasep_pm_do_poweroff_cores(kbdev);
-
-	/* Don't need 'cores_are_available', because we don't return anything */
-	CSTD_UNUSED(cores_are_available);
 }
 
 void kbase_pm_update_cores_state(struct kbase_device *kbdev)
@@ -531,14 +175,14 @@
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
-int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+int kbase_pm_list_policies(struct kbase_device *kbdev,
+	const struct kbase_pm_policy * const **list)
 {
-	if (!list)
-		return POLICY_COUNT;
+	WARN_ON(kbdev->policy_count == 0);
+	if (list)
+		*list = kbdev->policy_list;
 
-	*list = policy_list;
-
-	return POLICY_COUNT;
+	return kbdev->policy_count;
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
@@ -607,171 +251,3 @@
 }
 
 KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
-
-void kbase_pm_request_cores(struct kbase_device *kbdev,
-				bool tiler_required, bool shader_required)
-{
-	kbase_pm_change_state change_gpu_state = 0u;
-
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	if (shader_required) {
-		int cnt = ++kbdev->shader_needed_cnt;
-
-		if (cnt == 1)
-			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
-
-		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt != 0);
-	}
-
-	if (tiler_required) {
-		int cnt = ++kbdev->tiler_needed_cnt;
-
-		if (cnt == 1)
-			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
-
-		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
-	}
-
-	if (change_gpu_state) {
-		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
-				NULL, 0u, kbdev->shader_needed_cnt);
-		KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_TILER_NEEDED, NULL,
-				NULL, 0u, kbdev->tiler_needed_cnt);
-
-		kbase_pm_update_cores_state_nolock(kbdev);
-	}
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
-
-void kbase_pm_release_cores(struct kbase_device *kbdev,
-				bool tiler_required, bool shader_required)
-{
-	kbase_pm_change_state change_gpu_state = 0u;
-
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	if (shader_required) {
-		int cnt;
-
-		KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt > 0);
-
-		cnt = --kbdev->shader_needed_cnt;
-
-		if (0 == cnt) {
-			change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
-		}
-	}
-
-	if (tiler_required) {
-		int cnt;
-
-		KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
-
-		cnt = --kbdev->tiler_needed_cnt;
-
-		if (0 == cnt)
-			change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
-	}
-
-	if (change_gpu_state) {
-		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_NEEDED, NULL,
-				NULL, 0u, kbdev->shader_needed_cnt);
-		KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_TILER_NEEDED, NULL,
-				NULL, 0u, kbdev->tiler_needed_cnt);
-
-		kbase_pm_update_cores_state_nolock(kbdev);
-	}
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
-
-void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
-		bool tiler_required, bool shader_required)
-{
-	unsigned long flags;
-
-	kbase_pm_wait_for_poweroff_complete(kbdev);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_pm_request_cores(kbdev, tiler_required, shader_required);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	kbase_pm_check_transitions_sync(kbdev);
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
-
-static void kbase_pm_l2_caches_ref(struct kbase_device *kbdev)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	kbdev->l2_users_count++;
-
-	KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
-
-	/* Check for the required L2 transitions.
-	 * Caller would block here for the L2 caches of all core groups to be
-	 * powered on, so need to inform the Hw to power up all the L2 caches.
-	 * Can't rely on the l2_users_count value being non-zero previously to
-	 * avoid checking for the transition, as the count could be non-zero
-	 * even if not all the instances of L2 cache are powered up since
-	 * currently the power status of L2 is not tracked separately for each
-	 * core group. Also if the GPU is reset while the L2 is on, L2 will be
-	 * off but the count will be non-zero.
-	 */
-	kbase_pm_check_transitions_nolock(kbdev);
-}
-
-void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	/* Take the reference on l2_users_count and check core transitions.
-	 */
-	kbase_pm_l2_caches_ref(kbdev);
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	wait_event(kbdev->pm.backend.l2_powered_wait,
-					kbdev->pm.backend.l2_powered == 1);
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
-
-void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev)
-{
-	/* Take the reference on l2_users_count and check core transitions.
-	 */
-	kbase_pm_l2_caches_ref(kbdev);
-}
-
-void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	kbdev->l2_users_count++;
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
-
-void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
-
-	--kbdev->l2_users_count;
-
-	if (!kbdev->l2_users_count)
-		kbase_pm_check_transitions_nolock(kbdev);
-}
-
-KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
index 2e86929..966fce7 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -64,61 +64,10 @@
  */
 void kbase_pm_update_cores(struct kbase_device *kbdev);
 
-
-enum kbase_pm_cores_ready {
-	KBASE_CORES_NOT_READY = 0,
-	KBASE_NEW_AFFINITY = 1,
-	KBASE_CORES_READY = 2
-};
-
-
-/**
- * kbase_pm_request_cores - Request the desired cores to be powered up.
- * @kbdev:           Kbase device
- * @tiler_required:  true if tiler is required
- * @shader_required: true if shaders are required
- *
- * Called by the scheduler to request power to the desired cores.
- *
- * There is no guarantee that the HW will be powered up on return. Use
- * kbase_pm_cores_requested()/kbase_pm_cores_ready() to verify that cores are
- * now powered, or instead call kbase_pm_request_cores_sync().
- */
-void kbase_pm_request_cores(struct kbase_device *kbdev, bool tiler_required,
-		bool shader_required);
-
-/**
- * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
- * @kbdev:           Kbase device
- * @tiler_required:  true if tiler is required
- * @shader_required: true if shaders are required
- *
- * When this function returns, the @shader_cores will be in the READY state.
- *
- * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
- * work of ensuring the requested cores will remain powered until a matching
- * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
- * is made.
- */
-void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
-		bool tiler_required, bool shader_required);
-
-/**
- * kbase_pm_release_cores - Request the desired cores to be powered down.
- * @kbdev:           Kbase device
- * @tiler_required:  true if tiler is required
- * @shader_required: true if shaders are required
- *
- * Called by the scheduler to release its power reference on the desired cores.
- */
-void kbase_pm_release_cores(struct kbase_device *kbdev, bool tiler_required,
-		bool shader_required);
-
 /**
  * kbase_pm_cores_requested - Check that a power request has been locked into
  *                            the HW.
  * @kbdev:           Kbase device
- * @tiler_required:  true if tiler is required
  * @shader_required: true if shaders are required
  *
  * Called by the scheduler to check if a power on request has been locked into
@@ -136,112 +85,25 @@
  *         request is still pending.
  */
 static inline bool kbase_pm_cores_requested(struct kbase_device *kbdev,
-		bool tiler_required, bool shader_required)
+		bool shader_required)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	if ((shader_required && !kbdev->shader_available_bitmap) ||
-			(tiler_required && !kbdev->tiler_available_bitmap))
+	/* If the L2 & tiler are not on or pending, then the tiler is not yet
+	 * available, and shaders are definitely not powered.
+	 */
+	if (kbdev->pm.backend.l2_state != KBASE_L2_PEND_ON &&
+			kbdev->pm.backend.l2_state != KBASE_L2_ON &&
+			kbdev->pm.backend.l2_state != KBASE_L2_ON_HWCNT_ENABLE)
+		return false;
+
+	if (shader_required &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_PEND_ON_CORESTACK_ON &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON &&
+			kbdev->pm.backend.shaders_state != KBASE_SHADERS_ON_CORESTACK_ON_RECHECK)
 		return false;
 
 	return true;
 }
 
-/**
- * kbase_pm_cores_ready -  Check that the required cores have been powered on by
- *                         the HW.
- * @kbdev:           Kbase device
- * @tiler_required:  true if tiler is required
- * @shader_required: true if shaders are required
- *
- * Called by the scheduler to check if cores are ready.
- *
- * Note that the caller should ensure that they have first requested cores
- * before calling this function.
- *
- * Caller must hold the hwaccess_lock.
- *
- * Return: true if the cores are ready.
- */
-static inline bool kbase_pm_cores_ready(struct kbase_device *kbdev,
-		bool tiler_required, bool shader_required)
-{
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	if ((shader_required && !kbdev->shader_ready_bitmap) ||
-			(tiler_required && !kbdev->tiler_available_bitmap))
-		return false;
-
-	return true;
-}
-
-/**
- * kbase_pm_request_l2_caches - Request l2 caches
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Request the use of l2 caches for all core groups, power up, wait and prevent
- * the power manager from powering down the l2 caches.
- *
- * This tells the power management that the caches should be powered up, and
- * they should remain powered, irrespective of the usage of shader cores. This
- * does not return until the l2 caches are powered up.
- *
- * The caller must call kbase_pm_release_l2_caches() when they are finished
- * to allow normal power management of the l2 caches to resume.
- *
- * This should only be used when power management is active.
- */
-void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
-
-/**
- * kbase_pm_request_l2_caches_nolock - Request l2 caches, nolock version
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Request the use of l2 caches for all core groups and power up without
- * waiting for power manager to actually power up the cores. This is done
- * because the call to this function is done from within the atomic context
- * and the actual l2 caches being powered up is checked at a later stage.
- * The reference taken on l2 caches is removed when the protected mode atom
- * is released so there is no need to make a call to a matching
- * release_l2_caches().
- *
- * This function is used specifically for the case when l2 caches are
- * to be powered up as part of the sequence for entering protected mode.
- *
- * This should only be used when power management is active.
- */
-void kbase_pm_request_l2_caches_nolock(struct kbase_device *kbdev);
-
-/**
- * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Increment the count of l2 users but do not attempt to power on the l2
- *
- * It is the callers responsibility to ensure that the l2 is already powered up
- * and to eventually call kbase_pm_release_l2_caches()
- */
-void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
-
-/**
- * kbase_pm_release_l2_caches - Release l2 caches
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
- * Release the use of l2 caches for all core groups and allow the power manager
- * to power them down when necessary.
- *
- * This tells the power management that the caches can be powered down if
- * necessary, with respect to the usage of shader cores.
- *
- * The caller must have called kbase_pm_request_l2_caches() prior to a call
- * to this.
- *
- * This should only be used when power management is active.
- */
-void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
-
 #endif /* _KBASE_PM_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h
new file mode 100644
index 0000000..3f89eb5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_shader_states.h
@@ -0,0 +1,42 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Backend-specific Power Manager shader core state definitions.
+ * The function-like macro KBASEP_SHADER_STATE() must be defined before
+ * including this header file. This header file can be included multiple
+ * times in the same compilation unit with different definitions of
+ * KBASEP_SHADER_STATE().
+ */
+KBASEP_SHADER_STATE(OFF_CORESTACK_OFF)
+KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_ON)
+KBASEP_SHADER_STATE(PEND_ON_CORESTACK_ON)
+KBASEP_SHADER_STATE(ON_CORESTACK_ON)
+KBASEP_SHADER_STATE(ON_CORESTACK_ON_RECHECK)
+KBASEP_SHADER_STATE(WAIT_OFF_CORESTACK_ON)
+KBASEP_SHADER_STATE(WAIT_FINISHED_CORESTACK_ON)
+KBASEP_SHADER_STATE(L2_FLUSHING_CORESTACK_ON)
+KBASEP_SHADER_STATE(READY_OFF_CORESTACK_ON)
+KBASEP_SHADER_STATE(PEND_OFF_CORESTACK_ON)
+KBASEP_SHADER_STATE(OFF_CORESTACK_PEND_OFF)
+KBASEP_SHADER_STATE(OFF_CORESTACK_OFF_TIMER_PEND_OFF)
+KBASEP_SHADER_STATE(RESET_WAIT)
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
index 5e1b761..0e17dc0 100644
--- a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016,2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2016,2018-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,28 +32,35 @@
 
 	kbase_pm_request_gpu_cycle_counter(kbdev);
 
-	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
-	 * correctly */
-	do {
-		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
-		*cycle_counter = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(CYCLE_COUNT_LO));
-		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI));
+	if (cycle_counter) {
+		/* Read hi, lo, hi to ensure a coherent u64 */
+		do {
+			hi1 = kbase_reg_read(kbdev,
+					     GPU_CONTROL_REG(CYCLE_COUNT_HI));
+			*cycle_counter = kbase_reg_read(kbdev,
+					     GPU_CONTROL_REG(CYCLE_COUNT_LO));
+			hi2 = kbase_reg_read(kbdev,
+					     GPU_CONTROL_REG(CYCLE_COUNT_HI));
+		} while (hi1 != hi2);
 		*cycle_counter |= (((u64) hi1) << 32);
-	} while (hi1 != hi2);
+	}
 
-	/* Read hi, lo, hi to ensure that overflow from lo to hi is handled
-	 * correctly */
-	do {
-		hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
-		*system_time = kbase_reg_read(kbdev,
-					GPU_CONTROL_REG(TIMESTAMP_LO));
-		hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI));
+	if (system_time) {
+		/* Read hi, lo, hi to ensure a coherent u64 */
+		do {
+			hi1 = kbase_reg_read(kbdev,
+					     GPU_CONTROL_REG(TIMESTAMP_HI));
+			*system_time = kbase_reg_read(kbdev,
+					     GPU_CONTROL_REG(TIMESTAMP_LO));
+			hi2 = kbase_reg_read(kbdev,
+					     GPU_CONTROL_REG(TIMESTAMP_HI));
+		} while (hi1 != hi2);
 		*system_time |= (((u64) hi1) << 32);
-	} while (hi1 != hi2);
+	}
 
 	/* Record the CPU's idea of current time */
-	getrawmonotonic(ts);
+	if (ts != NULL)
+		getrawmonotonic(ts);
 
 	kbase_pm_release_gpu_cycle_counter(kbdev);
 }
diff --git a/drivers/gpu/arm/midgard/build.bp b/drivers/gpu/arm/midgard/build.bp
index ada6920..5e6fdfc 100644
--- a/drivers/gpu/arm/midgard/build.bp
+++ b/drivers/gpu/arm/midgard/build.bp
@@ -3,7 +3,7 @@
  * ----------------------------------------------------------------------------
  * This confidential and proprietary software may be used only as authorized
  * by a licensing agreement from ARM Limited.
- *      (C) COPYRIGHT 2017-2018 ARM Limited, ALL RIGHTS RESERVED
+ *      (C) COPYRIGHT 2017-2019 ARM Limited, ALL RIGHTS RESERVED
  * The entire notice above must be reproduced on all authorized copies and
  * copies may only be made to the extent permitted by a licensing agreement
  * from ARM Limited.
@@ -19,8 +19,8 @@
     no_mali: {
         kbuild_options: ["CONFIG_MALI_NO_MALI=y"],
     },
-    mali_corestack: {
-        kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
+    mali_real_hw: {
+        kbuild_options: ["CONFIG_MALI_REAL_HW=y"],
     },
     mali_devfreq: {
         kbuild_options: ["CONFIG_MALI_DEVFREQ=y"],
@@ -31,12 +31,18 @@
     mali_debug: {
         kbuild_options: ["CONFIG_MALI_DEBUG=y"],
     },
-    mali_fpga_bus_logger: {
-        kbuild_options: ["CONFIG_MALI_FPGA_BUS_LOGGER=y"],
+    buslog: {
+        kbuild_options: ["CONFIG_MALI_BUSLOG=y"],
     },
     cinstr_job_dump: {
         kbuild_options: ["CONFIG_MALI_JOB_DUMP=y"],
     },
+    cinstr_vector_dump: {
+        kbuild_options: ["CONFIG_MALI_VECTOR_DUMP=y"],
+    },
+    cinstr_gwt: {
+        kbuild_options: ["CONFIG_MALI_CINSTR_GWT=y"],
+    },
     mali_gator_support: {
         kbuild_options: ["CONFIG_MALI_GATOR_SUPPORT=y"],
     },
@@ -46,6 +52,15 @@
     mali_pwrsoft_765: {
         kbuild_options: ["CONFIG_MALI_PWRSOFT_765=y"],
     },
+    mali_memory_fully_backed: {
+        kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
+    },
+    mali_dma_buf_map_on_demand: {
+        kbuild_options: ["CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y"],
+    },
+    mali_dma_buf_legacy_compat: {
+        kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
+    },
     kbuild_options: [
         "MALI_UNIT_TEST={{.unit_test_code}}",
         "MALI_CUSTOMER_RELEASE={{.release}}",
@@ -78,8 +93,18 @@
         "CONFIG_MALI_MIDGARD=m",
         "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
         "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
-        "MALI_MOCK_TEST={{.mali_mock_test}}",
     ],
+    buslog: {
+        extra_symbols: [
+            "bus_logger",
+        ],
+    },
+    mali_corestack: {
+        kbuild_options: ["CONFIG_MALI_CORESTACK=y"],
+    },
+    mali_platform_power_down_only: {
+        kbuild_options: ["CONFIG_MALI_PLATFORM_POWER_DOWN_ONLY=y"],
+    },
     mali_error_inject: {
         kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
     },
@@ -92,8 +117,11 @@
     mali_2mb_alloc: {
         kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
     },
-    mali_mock_test: {
-        srcs: ["tests/internal/src/mock/mali_kbase_pm_driver_mock.c"],
+    mali_hw_errata_1485982_not_affected: {
+        kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y"],
+    },
+    mali_hw_errata_1485982_use_clock_alternative: {
+        kbuild_options: ["CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE=y"],
     },
     gpu_has_csf: {
         srcs: [
@@ -104,5 +132,3 @@
     },
     defaults: ["mali_kbase_shared_config_defaults"],
 }
-
-optional_subdirs = ["tests"]
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
index 15566f6..9b75f0d 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,16 +38,18 @@
 #endif
 
 #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
-#define KBASE_IPA_G71_MODEL_NAME      "mali-g71-power-model"
-#define KBASE_IPA_G72_MODEL_NAME      "mali-g72-power-model"
-#define KBASE_IPA_TNOX_MODEL_NAME     "mali-tnox-power-model"
-#define KBASE_IPA_TGOX_R1_MODEL_NAME  "mali-tgox_r1-power-model"
 
-static struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
+static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
 	&kbase_simple_ipa_model_ops,
 	&kbase_g71_ipa_model_ops,
 	&kbase_g72_ipa_model_ops,
-	&kbase_tnox_ipa_model_ops
+	&kbase_g76_ipa_model_ops,
+	&kbase_g52_ipa_model_ops,
+	&kbase_g52_r1_ipa_model_ops,
+	&kbase_g51_ipa_model_ops,
+	&kbase_g77_ipa_model_ops,
+	&kbase_tnax_ipa_model_ops,
+	&kbase_tbex_ipa_model_ops
 };
 
 int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
@@ -68,13 +70,13 @@
 	return err;
 }
 
-static struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
 							    const char *name)
 {
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
-		struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
+		const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
 
 		if (!strcmp(ops->name, name))
 			return ops;
@@ -84,34 +86,40 @@
 
 	return NULL;
 }
+KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
 
 const char *kbase_ipa_model_name_from_id(u32 gpu_id)
 {
 	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
 			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
-	if (GPU_ID_IS_NEW_FORMAT(prod_id)) {
-		switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
-		case GPU_ID2_PRODUCT_TMIX:
-			return KBASE_IPA_G71_MODEL_NAME;
-		case GPU_ID2_PRODUCT_THEX:
-			return KBASE_IPA_G72_MODEL_NAME;
-		case GPU_ID2_PRODUCT_TNOX:
-			return KBASE_IPA_TNOX_MODEL_NAME;
-		case GPU_ID2_PRODUCT_TGOX:
-			if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
-					(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
-				/* TGOX r0 shares a power model with TNOX */
-				return KBASE_IPA_TNOX_MODEL_NAME;
-			else
-				return KBASE_IPA_TGOX_R1_MODEL_NAME;
-		default:
-			return KBASE_IPA_FALLBACK_MODEL_NAME;
-		}
+	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+	case GPU_ID2_PRODUCT_TMIX:
+		return "mali-g71-power-model";
+	case GPU_ID2_PRODUCT_THEX:
+		return "mali-g72-power-model";
+	case GPU_ID2_PRODUCT_TNOX:
+		return "mali-g76-power-model";
+	case GPU_ID2_PRODUCT_TSIX:
+		return "mali-g51-power-model";
+	case GPU_ID2_PRODUCT_TGOX:
+		if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
+				(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
+			/* g52 aliased to g76 power-model's ops */
+			return "mali-g52-power-model";
+		else
+			return "mali-g52_r1-power-model";
+	case GPU_ID2_PRODUCT_TNAX:
+		return "mali-tnax-power-model";
+	case GPU_ID2_PRODUCT_TTRX:
+		return "mali-g77-power-model";
+	case GPU_ID2_PRODUCT_TBEX:
+		return "mali-tbex-power-model";
+	default:
+		return KBASE_IPA_FALLBACK_MODEL_NAME;
 	}
-
-	return KBASE_IPA_FALLBACK_MODEL_NAME;
 }
+KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
 
 static struct device_node *get_model_dt_node(struct kbase_ipa_model *model)
 {
@@ -244,7 +252,7 @@
 KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
 
 struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
-					     struct kbase_ipa_model_ops *ops)
+					     const struct kbase_ipa_model_ops *ops)
 {
 	struct kbase_ipa_model *model;
 	int err;
@@ -298,7 +306,7 @@
 {
 
 	const char *model_name;
-	struct kbase_ipa_model_ops *ops;
+	const struct kbase_ipa_model_ops *ops;
 	struct kbase_ipa_model *default_model = NULL;
 	int err;
 
@@ -371,6 +379,8 @@
 	mutex_lock(&kbdev->ipa.lock);
 	kbase_ipa_term_locked(kbdev);
 	mutex_unlock(&kbdev->ipa.lock);
+
+	mutex_destroy(&kbdev->ipa.lock);
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_term);
 
@@ -457,7 +467,8 @@
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-	if (kbdev->ipa_protection_mode_switched)
+	if (kbdev->ipa_protection_mode_switched ||
+			kbdev->ipa.force_fallback_model)
 		model = kbdev->ipa.fallback_model;
 	else
 		model = kbdev->ipa.configured_model;
@@ -516,6 +527,9 @@
 	struct kbase_device *kbdev = kbase_find_device(-1);
 #endif
 
+	if (!kbdev)
+		return 0ul;
+
 	mutex_lock(&kbdev->ipa.lock);
 
 	model = get_current_model(kbdev);
@@ -551,6 +565,9 @@
 	struct kbase_device *kbdev = kbase_find_device(-1);
 #endif
 
+	if (!kbdev)
+		return 0ul;
+
 	mutex_lock(&kbdev->ipa.lock);
 
 	model = kbdev->ipa.fallback_model;
@@ -626,6 +643,9 @@
 	int ret;
 	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
 
+	if (!kbdev)
+		return -ENODEV;
+
 	mutex_lock(&kbdev->ipa.lock);
 	ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
 	mutex_unlock(&kbdev->ipa.lock);
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
index 4656ded..92aace9 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,7 +40,7 @@
 struct kbase_ipa_model {
 	struct kbase_device *kbdev;
 	void *model_data;
-	struct kbase_ipa_model_ops *ops;
+	const struct kbase_ipa_model_ops *ops;
 	struct list_head params;
 	bool missing_dt_node_warning;
 };
@@ -154,6 +154,25 @@
 int kbase_ipa_model_recalculate(struct kbase_ipa_model *model);
 
 /**
+ * kbase_ipa_model_ops_find - Lookup an IPA model using its name
+ * @kbdev:      pointer to kbase device
+ * @name:       name of model to lookup
+ *
+ * Return: Pointer to model's 'ops' structure, or NULL if the lookup failed.
+ */
+const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
+							   const char *name);
+
+/**
+ * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID
+ * @gpu_id:     GPU ID of GPU the model will be used for
+ *
+ * Return: The name of the appropriate counter-based model, or the name of the
+ *         fallback model if no counter model exists.
+ */
+const char *kbase_ipa_model_name_from_id(u32 gpu_id);
+
+/**
  * kbase_ipa_init_model - Initilaize the particular IPA model
  * @kbdev:      pointer to kbase device
  * @ops:        pointer to object containing model specific methods.
@@ -164,7 +183,7 @@
  * Return: pointer to kbase_ipa_model on success, NULL on error
  */
 struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
-					     struct kbase_ipa_model_ops *ops);
+					     const struct kbase_ipa_model_ops *ops);
 /**
  * kbase_ipa_term_model - Terminate the particular IPA model
  * @model:      pointer to the IPA model object, already initialized
@@ -183,10 +202,15 @@
  */
 void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev);
 
-extern struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
-extern struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
-extern struct kbase_ipa_model_ops kbase_tnox_ipa_model_ops;
-extern struct kbase_ipa_model_ops kbase_tgox_r1_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_g77_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_tnax_ipa_model_ops;
+extern const struct kbase_ipa_model_ops kbase_tbex_ipa_model_ops;
 
 /**
  * kbase_get_real_power() - get the real power consumption of the GPU
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
index 814bb4c..30a3b7d 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -141,6 +141,7 @@
 }
 
 static const struct file_operations fops_string = {
+	.owner = THIS_MODULE,
 	.read = param_string_get,
 	.write = param_string_set,
 	.open = simple_open,
@@ -188,6 +189,33 @@
 	}
 }
 
+static int force_fallback_model_get(void *data, u64 *val)
+{
+	struct kbase_device *kbdev = data;
+
+	mutex_lock(&kbdev->ipa.lock);
+	*val = kbdev->ipa.force_fallback_model;
+	mutex_unlock(&kbdev->ipa.lock);
+
+	return 0;
+}
+
+static int force_fallback_model_set(void *data, u64 val)
+{
+	struct kbase_device *kbdev = data;
+
+	mutex_lock(&kbdev->ipa.lock);
+	kbdev->ipa.force_fallback_model = (val ? true : false);
+	mutex_unlock(&kbdev->ipa.lock);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(force_fallback_model,
+		force_fallback_model_get,
+		force_fallback_model_set,
+		"%llu\n");
+
 static int current_power_get(void *data, u64 *val)
 {
 	struct kbase_device *kbdev = data;
@@ -195,8 +223,12 @@
 	u32 power;
 
 	kbase_pm_context_active(kbdev);
+	/* The current model assumes that there's no more than one voltage
+	 * regulator currently available in the system.
+	 */
 	kbase_get_real_power(df, &power,
-		kbdev->current_nominal_freq, (kbdev->current_voltage[0] / 1000));
+		kbdev->current_nominal_freq,
+		(kbdev->current_voltages[0] / 1000));
 	kbase_pm_context_idle(kbdev);
 
 	*val = power;
@@ -282,7 +314,9 @@
 	kbase_ipa_model_debugfs_init(kbdev->ipa.fallback_model);
 
 	debugfs_create_file("ipa_current_power", 0444,
-			kbdev->mali_debugfs_directory, kbdev, &current_power);
+		kbdev->mali_debugfs_directory, kbdev, &current_power);
+	debugfs_create_file("ipa_force_fallback_model", 0644,
+		kbdev->mali_debugfs_directory, kbdev, &force_fallback_model);
 
 	mutex_unlock(&kbdev->ipa.lock);
 }
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
index e3efed1..852559e 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_simple.c
@@ -27,7 +27,6 @@
 #endif
 #include <linux/of.h>
 #include <linux/delay.h>
-#include <linux/freezer.h>
 #include <linux/kthread.h>
 
 #include "mali_kbase.h"
@@ -133,7 +132,7 @@
 	const s64 res_big = ts[3] * t3    /* +/- 2^62 */
 			  + ts[2] * t2    /* +/- 2^55 */
 			  + ts[1] * t     /* +/- 2^48 */
-			  + ts[0] * 1000; /* +/- 2^41 */
+			  + ts[0] * (s64)1000; /* +/- 2^41 */
 
 	/* Range: -2^60 < res_unclamped < 2^60 */
 	s64 res_unclamped = div_s64(res_big, 1000);
@@ -155,8 +154,6 @@
 	int temp;
 #endif
 
-	set_freezable();
-
 	while (!kthread_should_stop()) {
 		struct thermal_zone_device *tz = READ_ONCE(model_data->gpu_tz);
 
@@ -176,7 +173,6 @@
 		WRITE_ONCE(model_data->current_temperature, temp);
 
 		msleep_interruptible(READ_ONCE(model_data->temperature_poll_interval_ms));
-		try_to_freeze();
 	}
 
 	return 0;
@@ -272,8 +268,9 @@
 							  (void *) model_data,
 							  "mali-simple-power-model-temp-poll");
 	if (IS_ERR(model_data->poll_temperature_thread)) {
+		err = PTR_ERR(model_data->poll_temperature_thread);
 		kfree(model_data);
-		return PTR_ERR(model_data->poll_temperature_thread);
+		return err;
 	}
 
 	err = add_params(model);
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
index 699252d..9fae8f1 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -44,16 +44,23 @@
 	struct kbase_ipa_model_vinstr_data *model_data,
 	u32 offset)
 {
-	u8 *p = model_data->vinstr_buffer;
+	u8 *p = (u8 *)model_data->dump_buf.dump_buf;
 
 	return *(u32 *)&p[offset];
 }
 
 static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
 {
-	if (S64_MAX - a < b)
-		return S64_MAX;
-	return a + b;
+	s64 rtn;
+
+	if (a > 0 && (S64_MAX - a) < b)
+		rtn = S64_MAX;
+	else if (a < 0 && (S64_MIN - a) > b)
+		rtn = S64_MIN;
+	else
+		rtn = a + b;
+
+	return rtn;
 }
 
 s64 kbase_ipa_sum_all_shader_cores(
@@ -118,117 +125,69 @@
 	return counter_value * (s64) coeff;
 }
 
-/**
- * kbase_ipa_gpu_active - Inform IPA that GPU is now active
- * @model_data: Pointer to model data
- *
- * This function may cause vinstr to become active.
- */
-static void kbase_ipa_gpu_active(struct kbase_ipa_model_vinstr_data *model_data)
-{
-	struct kbase_device *kbdev = model_data->kbdev;
-
-	lockdep_assert_held(&kbdev->pm.lock);
-
-	if (!kbdev->ipa.vinstr_active) {
-		kbdev->ipa.vinstr_active = true;
-		kbase_vinstr_resume_client(model_data->vinstr_cli);
-	}
-}
-
-/**
- * kbase_ipa_gpu_idle - Inform IPA that GPU is now idle
- * @model_data: Pointer to model data
- *
- * This function may cause vinstr to become idle.
- */
-static void kbase_ipa_gpu_idle(struct kbase_ipa_model_vinstr_data *model_data)
-{
-	struct kbase_device *kbdev = model_data->kbdev;
-
-	lockdep_assert_held(&kbdev->pm.lock);
-
-	if (kbdev->ipa.vinstr_active) {
-		kbase_vinstr_suspend_client(model_data->vinstr_cli);
-		kbdev->ipa.vinstr_active = false;
-	}
-}
-
 int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
 {
+	int errcode;
 	struct kbase_device *kbdev = model_data->kbdev;
-	struct kbase_ioctl_hwcnt_reader_setup setup;
-	size_t dump_size;
+	struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt;
+	struct kbase_hwcnt_enable_map enable_map;
+	const struct kbase_hwcnt_metadata *metadata =
+		kbase_hwcnt_virtualizer_metadata(hvirt);
 
-	dump_size = kbase_vinstr_dump_size(kbdev);
-	model_data->vinstr_buffer = kzalloc(dump_size, GFP_KERNEL);
-	if (!model_data->vinstr_buffer) {
+	if (!metadata)
+		return -1;
+
+	errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map);
+	if (errcode) {
+		dev_err(kbdev->dev, "Failed to allocate IPA enable map");
+		return errcode;
+	}
+
+	kbase_hwcnt_enable_map_enable_all(&enable_map);
+
+	errcode = kbase_hwcnt_virtualizer_client_create(
+		hvirt, &enable_map, &model_data->hvirt_cli);
+	kbase_hwcnt_enable_map_free(&enable_map);
+	if (errcode) {
+		dev_err(kbdev->dev, "Failed to register IPA with virtualizer");
+		model_data->hvirt_cli = NULL;
+		return errcode;
+	}
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(
+		metadata, &model_data->dump_buf);
+	if (errcode) {
 		dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
-		return -1;
+		kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
+		model_data->hvirt_cli = NULL;
+		return errcode;
 	}
 
-	setup.jm_bm = ~0u;
-	setup.shader_bm = ~0u;
-	setup.tiler_bm = ~0u;
-	setup.mmu_l2_bm = ~0u;
-	model_data->vinstr_cli = kbase_vinstr_hwcnt_kernel_setup(kbdev->vinstr_ctx,
-			&setup, model_data->vinstr_buffer);
-	if (!model_data->vinstr_cli) {
-		dev_err(kbdev->dev, "Failed to register IPA with vinstr core");
-		kfree(model_data->vinstr_buffer);
-		model_data->vinstr_buffer = NULL;
-		return -1;
-	}
-
-	kbase_vinstr_hwc_clear(model_data->vinstr_cli);
-
-	kbdev->ipa.gpu_active_callback = kbase_ipa_gpu_active;
-	kbdev->ipa.gpu_idle_callback = kbase_ipa_gpu_idle;
-	kbdev->ipa.model_data = model_data;
-	kbdev->ipa.vinstr_active = false;
-	/* Suspend vinstr, to ensure that the GPU is powered off until there is
-	 * something to execute.
-	 */
-	kbase_vinstr_suspend_client(model_data->vinstr_cli);
-
 	return 0;
 }
 
 void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
 {
-	struct kbase_device *kbdev = model_data->kbdev;
-
-	kbdev->ipa.gpu_active_callback = NULL;
-	kbdev->ipa.gpu_idle_callback = NULL;
-	kbdev->ipa.model_data = NULL;
-	kbdev->ipa.vinstr_active = false;
-
-	if (model_data->vinstr_cli)
-		kbase_vinstr_detach_client(model_data->vinstr_cli);
-
-	model_data->vinstr_cli = NULL;
-	kfree(model_data->vinstr_buffer);
-	model_data->vinstr_buffer = NULL;
+	if (model_data->hvirt_cli) {
+		kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
+		kbase_hwcnt_dump_buffer_free(&model_data->dump_buf);
+		model_data->hvirt_cli = NULL;
+	}
 }
 
 int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
 {
 	struct kbase_ipa_model_vinstr_data *model_data =
 			(struct kbase_ipa_model_vinstr_data *)model->model_data;
-	struct kbase_device *kbdev = model_data->kbdev;
 	s64 energy = 0;
 	size_t i;
 	u64 coeff = 0, coeff_mul = 0;
+	u64 start_ts_ns, end_ts_ns;
 	u32 active_cycles;
 	int err = 0;
 
-	if (!kbdev->ipa.vinstr_active) {
-		err = -ENODATA;
-		goto err0; /* GPU powered off - no counters to collect */
-	}
-
-	err = kbase_vinstr_hwc_dump(model_data->vinstr_cli,
-				    BASE_HWCNT_READER_EVENT_MANUAL);
+	err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli,
+		&start_ts_ns, &end_ts_ns, &model_data->dump_buf);
 	if (err)
 		goto err0;
 
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
index 0deafae..46e3cd4 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_common.h
@@ -24,6 +24,8 @@
 #define _KBASE_IPA_VINSTR_COMMON_H_
 
 #include "mali_kbase.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
 
 /* Maximum number of IPA groups for an IPA model. */
 #define KBASE_IPA_MAX_GROUP_DEF_NUM  16
@@ -49,8 +51,8 @@
  * @groups_def_num:      Number of elements in the array of IPA groups.
  * @get_active_cycles:   Callback to return number of active cycles during
  *                       counter sample period
- * @vinstr_cli:          vinstr client handle
- * @vinstr_buffer:       buffer to dump hardware counters onto
+ * @hvirt_cli:           hardware counter virtualizer client handle
+ * @dump_buf:            buffer to dump hardware counters onto
  * @reference_voltage:   voltage, in mV, of the operating point used when
  *                       deriving the power model coefficients. Range approx
  *                       0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13
@@ -72,8 +74,8 @@
 	const struct kbase_ipa_group *groups_def;
 	size_t groups_def_num;
 	kbase_ipa_get_active_cycles_callback get_active_cycles;
-	struct kbase_vinstr_client *vinstr_cli;
-	void *vinstr_buffer;
+	struct kbase_hwcnt_virtualizer_client *hvirt_cli;
+	struct kbase_hwcnt_dump_buffer dump_buf;
 	s32 reference_voltage;
 	s32 scaling_factor;
 	s32 min_sample_cycles;
diff --git a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
index 8366033..270b75e 100644
--- a/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
+++ b/drivers/gpu/arm/midgard/ipa/mali_kbase_ipa_vinstr_g7x.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,6 @@
 
 #include "mali_kbase_ipa_vinstr_common.h"
 #include "mali_kbase.h"
-#include "mali_kbase_ipa_debugfs.h"
 
 
 /* Performance counter blocks base offsets */
@@ -43,7 +42,10 @@
 /* SC counter block offsets */
 #define SC_FRAG_ACTIVE             (KBASE_IPA_NR_BYTES_PER_CNT *  4)
 #define SC_EXEC_CORE_ACTIVE        (KBASE_IPA_NR_BYTES_PER_CNT * 26)
+#define SC_EXEC_INSTR_FMA          (KBASE_IPA_NR_BYTES_PER_CNT * 27)
 #define SC_EXEC_INSTR_COUNT        (KBASE_IPA_NR_BYTES_PER_CNT * 28)
+#define SC_EXEC_INSTR_MSG          (KBASE_IPA_NR_BYTES_PER_CNT * 30)
+#define SC_TEX_FILT_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 39)
 #define SC_TEX_COORD_ISSUE         (KBASE_IPA_NR_BYTES_PER_CNT * 40)
 #define SC_TEX_TFCH_NUM_OPERATIONS (KBASE_IPA_NR_BYTES_PER_CNT * 42)
 #define SC_VARY_INSTR              (KBASE_IPA_NR_BYTES_PER_CNT * 49)
@@ -248,7 +250,7 @@
 	},
 };
 
-static const struct kbase_ipa_group ipa_groups_def_tnox[] = {
+static const struct kbase_ipa_group ipa_groups_def_g76[] = {
 	{
 		.name = "gpu_active",
 		.default_value = 122000,
@@ -281,7 +283,7 @@
 	},
 };
 
-static const struct kbase_ipa_group ipa_groups_def_tgox_r1[] = {
+static const struct kbase_ipa_group ipa_groups_def_g52_r1[] = {
 	{
 		.name = "gpu_active",
 		.default_value = 224200,
@@ -314,6 +316,115 @@
 	},
 };
 
+static const struct kbase_ipa_group ipa_groups_def_g51[] = {
+	{
+		.name = "gpu_active",
+		.default_value = 201400,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+	{
+		.name = "exec_instr_count",
+		.default_value = 392700,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_COUNT,
+	},
+	{
+		.name = "vary_instr",
+		.default_value = 274000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_VARY_INSTR,
+	},
+	{
+		.name = "tex_tfch_num_operations",
+		.default_value = 528000,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_TFCH_NUM_OPERATIONS,
+	},
+	{
+		.name = "l2_access",
+		.default_value = 506400,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_g77[] = {
+	{
+		.name = "l2_access",
+		.default_value = 710800,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_msg",
+		.default_value = 2375300,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_MSG,
+	},
+	{
+		.name = "exec_instr_fma",
+		.default_value = 656100,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_FMA,
+	},
+	{
+		.name = "tex_filt_num_operations",
+		.default_value = 318800,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 172800,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+static const struct kbase_ipa_group ipa_groups_def_tbex[] = {
+	{
+		.name = "l2_access",
+		.default_value = 599800,
+		.op = kbase_g7x_sum_all_memsys_blocks,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+	},
+	{
+		.name = "exec_instr_msg",
+		.default_value = 1830200,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_MSG,
+	},
+	{
+		.name = "exec_instr_fma",
+		.default_value = 407300,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_EXEC_INSTR_FMA,
+	},
+	{
+		.name = "tex_filt_num_operations",
+		.default_value = 224500,
+		.op = kbase_g7x_sum_all_shader_cores,
+		.counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS,
+	},
+	{
+		.name = "gpu_active",
+		.default_value = 153800,
+		.op = kbase_g7x_jm_single_counter,
+		.counter_block_offset = JM_GPU_ACTIVE,
+	},
+};
+
+
+#define IPA_POWER_MODEL_OPS(gpu, init_token) \
+	const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
+		.name = "mali-" #gpu "-power-model", \
+		.init = kbase_ ## init_token ## _power_model_init, \
+		.term = kbase_ipa_vinstr_common_model_term, \
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
+	}; \
+	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+
 #define STANDARD_POWER_MODEL(gpu, reference_voltage) \
 	static int kbase_ ## gpu ## _power_model_init(\
 			struct kbase_ipa_model *model) \
@@ -326,15 +437,20 @@
 				kbase_g7x_get_active_cycles, \
 				(reference_voltage)); \
 	} \
-	struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
-		.name = "mali-" #gpu "-power-model", \
-		.init = kbase_ ## gpu ## _power_model_init, \
-		.term = kbase_ipa_vinstr_common_model_term, \
-		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
-	}; \
-	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+	IPA_POWER_MODEL_OPS(gpu, gpu)
+
+#define ALIAS_POWER_MODEL(gpu, as_gpu) \
+	IPA_POWER_MODEL_OPS(gpu, as_gpu)
 
 STANDARD_POWER_MODEL(g71, 800);
 STANDARD_POWER_MODEL(g72, 800);
-STANDARD_POWER_MODEL(tnox, 800);
-STANDARD_POWER_MODEL(tgox_r1, 1000);
+STANDARD_POWER_MODEL(g76, 800);
+STANDARD_POWER_MODEL(g52_r1, 1000);
+STANDARD_POWER_MODEL(g51, 1000);
+STANDARD_POWER_MODEL(g77, 1000);
+STANDARD_POWER_MODEL(tbex, 1000);
+
+/* g52 is an alias of g76 (TNOX) for IPA */
+ALIAS_POWER_MODEL(g52, g76);
+/* tnax is an alias of g77 (TTRX) for IPA */
+ALIAS_POWER_MODEL(tnax, g77);
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
index 10da0c5..3d24972 100644
--- a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -58,7 +58,9 @@
 	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_TLS_HASHING,
 	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
-	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_END
 };
 
@@ -66,120 +68,6 @@
 	BASE_HW_FEATURE_END
 };
 
-static const enum base_hw_feature base_hw_features_t60x[] = {
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
-	BASE_HW_FEATURE_V4,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_t62x[] = {
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
-	BASE_HW_FEATURE_V4,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_t72x[] = {
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
-	BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_V4,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_t76x[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_MSAA_16X,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_tFxx[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_MSAA_16X,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_t83x[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_t82x[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
-	BASE_HW_FEATURE_END
-};
-
 static const enum base_hw_feature base_hw_features_tMIx[] = {
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
@@ -203,6 +91,7 @@
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_END
 };
 
@@ -230,6 +119,7 @@
 	BASE_HW_FEATURE_PROTECTED_MODE,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_END
 };
 
@@ -257,6 +147,7 @@
 	BASE_HW_FEATURE_PROTECTED_MODE,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_END
 };
 
@@ -284,6 +175,7 @@
 	BASE_HW_FEATURE_PROTECTED_MODE,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_END
 };
 
@@ -313,7 +205,7 @@
 	BASE_HW_FEATURE_COHERENCY_REG,
 	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_TLS_HASHING,
-	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_END
 };
 
@@ -343,34 +235,7 @@
 	BASE_HW_FEATURE_COHERENCY_REG,
 	BASE_HW_FEATURE_AARCH64_MMU,
 	BASE_HW_FEATURE_TLS_HASHING,
-	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
-	BASE_HW_FEATURE_END
-};
-
-static const enum base_hw_feature base_hw_features_tKAx[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_MSAA_16X,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_MODE,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
 	BASE_HW_FEATURE_END
 };
 
@@ -398,10 +263,12 @@
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
 	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
 	BASE_HW_FEATURE_END
 };
 
-static const enum base_hw_feature base_hw_features_tBOx[] = {
+static const enum base_hw_feature base_hw_features_tNAx[] = {
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
@@ -425,10 +292,12 @@
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
 	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
 	BASE_HW_FEATURE_END
 };
 
-static const enum base_hw_feature base_hw_features_tEGx[] = {
+static const enum base_hw_feature base_hw_features_tBEx[] = {
 	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
 	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
 	BASE_HW_FEATURE_XAFFINITY,
@@ -447,14 +316,160 @@
 	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
 	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
 	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_THREAD_GROUP_SPLIT,
 	BASE_HW_FEATURE_FLUSH_REDUCTION,
 	BASE_HW_FEATURE_PROTECTED_MODE,
 	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
 	BASE_HW_FEATURE_COHERENCY_REG,
 	BASE_HW_FEATURE_AARCH64_MMU,
-	BASE_HW_FEATURE_TLS_HASHING,
-	BASE_HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tULx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tDUx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tODx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tIDx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
+	BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tVAx[] = {
+	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+	BASE_HW_FEATURE_XAFFINITY,
+	BASE_HW_FEATURE_WARPING,
+	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+	BASE_HW_FEATURE_BRNDOUT_CC,
+	BASE_HW_FEATURE_BRNDOUT_KILL,
+	BASE_HW_FEATURE_LD_ST_LEA_TEX,
+	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+	BASE_HW_FEATURE_MRT,
+	BASE_HW_FEATURE_MSAA_16X,
+	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+	BASE_HW_FEATURE_TEST4_DATUM_MODE,
+	BASE_HW_FEATURE_FLUSH_REDUCTION,
+	BASE_HW_FEATURE_PROTECTED_MODE,
+	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
+	BASE_HW_FEATURE_COHERENCY_REG,
+	BASE_HW_FEATURE_AARCH64_MMU,
+	BASE_HW_FEATURE_L2_CONFIG,
+	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
 	BASE_HW_FEATURE_END
 };
 
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
index 19ffd69..7448608 100644
--- a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -91,8 +91,6 @@
 	BASE_HW_ISSUE_10984,
 	BASE_HW_ISSUE_10995,
 	BASE_HW_ISSUE_11012,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11024,
 	BASE_HW_ISSUE_11035,
 	BASE_HW_ISSUE_11042,
 	BASE_HW_ISSUE_11051,
@@ -124,9 +122,16 @@
 	BASE_HW_ISSUE_TMIX_8456,
 	GPUCORE_1619,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TMIX_8438,
 	BASE_HW_ISSUE_TNOX_1194,
 	BASE_HW_ISSUE_TGOX_R1_1234,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TSIX_1792,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -134,844 +139,6 @@
 	BASE_HW_ISSUE_END
 };
 
-static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
-	BASE_HW_ISSUE_6367,
-	BASE_HW_ISSUE_6398,
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_6787,
-	BASE_HW_ISSUE_7027,
-	BASE_HW_ISSUE_7144,
-	BASE_HW_ISSUE_7304,
-	BASE_HW_ISSUE_8073,
-	BASE_HW_ISSUE_8186,
-	BASE_HW_ISSUE_8215,
-	BASE_HW_ISSUE_8245,
-	BASE_HW_ISSUE_8250,
-	BASE_HW_ISSUE_8260,
-	BASE_HW_ISSUE_8280,
-	BASE_HW_ISSUE_8316,
-	BASE_HW_ISSUE_8381,
-	BASE_HW_ISSUE_8394,
-	BASE_HW_ISSUE_8401,
-	BASE_HW_ISSUE_8408,
-	BASE_HW_ISSUE_8443,
-	BASE_HW_ISSUE_8456,
-	BASE_HW_ISSUE_8564,
-	BASE_HW_ISSUE_8634,
-	BASE_HW_ISSUE_8778,
-	BASE_HW_ISSUE_8791,
-	BASE_HW_ISSUE_8833,
-	BASE_HW_ISSUE_8896,
-	BASE_HW_ISSUE_8975,
-	BASE_HW_ISSUE_8986,
-	BASE_HW_ISSUE_8987,
-	BASE_HW_ISSUE_9010,
-	BASE_HW_ISSUE_9418,
-	BASE_HW_ISSUE_9423,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_9510,
-	BASE_HW_ISSUE_9566,
-	BASE_HW_ISSUE_9630,
-	BASE_HW_ISSUE_10410,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10472,
-	BASE_HW_ISSUE_10487,
-	BASE_HW_ISSUE_10607,
-	BASE_HW_ISSUE_10632,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10676,
-	BASE_HW_ISSUE_10682,
-	BASE_HW_ISSUE_10684,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10931,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_10969,
-	BASE_HW_ISSUE_10984,
-	BASE_HW_ISSUE_10995,
-	BASE_HW_ISSUE_11012,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11035,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_11056,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_3964,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
-	BASE_HW_ISSUE_6367,
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_6787,
-	BASE_HW_ISSUE_7027,
-	BASE_HW_ISSUE_7304,
-	BASE_HW_ISSUE_8408,
-	BASE_HW_ISSUE_8564,
-	BASE_HW_ISSUE_8778,
-	BASE_HW_ISSUE_8975,
-	BASE_HW_ISSUE_9010,
-	BASE_HW_ISSUE_9418,
-	BASE_HW_ISSUE_9423,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_9510,
-	BASE_HW_ISSUE_10410,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10472,
-	BASE_HW_ISSUE_10487,
-	BASE_HW_ISSUE_10607,
-	BASE_HW_ISSUE_10632,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10676,
-	BASE_HW_ISSUE_10682,
-	BASE_HW_ISSUE_10684,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10931,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_10969,
-	BASE_HW_ISSUE_11012,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11035,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_11056,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
-	BASE_HW_ISSUE_6367,
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_6787,
-	BASE_HW_ISSUE_7027,
-	BASE_HW_ISSUE_7304,
-	BASE_HW_ISSUE_8408,
-	BASE_HW_ISSUE_8564,
-	BASE_HW_ISSUE_8778,
-	BASE_HW_ISSUE_8975,
-	BASE_HW_ISSUE_9010,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_9510,
-	BASE_HW_ISSUE_10410,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10472,
-	BASE_HW_ISSUE_10487,
-	BASE_HW_ISSUE_10607,
-	BASE_HW_ISSUE_10632,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10676,
-	BASE_HW_ISSUE_10682,
-	BASE_HW_ISSUE_10684,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10931,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11012,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11035,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_11056,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10127,
-	BASE_HW_ISSUE_10327,
-	BASE_HW_ISSUE_10410,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10472,
-	BASE_HW_ISSUE_10487,
-	BASE_HW_ISSUE_10607,
-	BASE_HW_ISSUE_10632,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10676,
-	BASE_HW_ISSUE_10682,
-	BASE_HW_ISSUE_10684,
-	BASE_HW_ISSUE_10817,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10931,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_10959,
-	BASE_HW_ISSUE_11012,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11024,
-	BASE_HW_ISSUE_11035,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_11056,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10472,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10684,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10931,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_10959,
-	BASE_HW_ISSUE_11012,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11024,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_11056,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10472,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10684,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10931,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_10959,
-	BASE_HW_ISSUE_11012,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_11056,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11024,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_26,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3542,
-	BASE_HW_ISSUE_T76X_3556,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11024,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_26,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3542,
-	BASE_HW_ISSUE_T76X_3556,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_26,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3542,
-	BASE_HW_ISSUE_T76X_3556,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11024,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_26,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3542,
-	BASE_HW_ISSUE_T76X_3556,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_26,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3542,
-	BASE_HW_ISSUE_T76X_3556,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10684,
-	BASE_HW_ISSUE_10797,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_11056,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10684,
-	BASE_HW_ISSUE_10797,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_11056,
-	BASE_HW_ISSUE_T720_1386,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10684,
-	BASE_HW_ISSUE_10797,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_11056,
-	BASE_HW_ISSUE_T720_1386,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_t72x[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10471,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10797,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3964,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_t76x[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11024,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_t60x[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_8778,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10472,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10931,
-	BASE_HW_ISSUE_11012,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11024,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3964,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_t62x[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_6402,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10472,
-	BASE_HW_ISSUE_10649,
-	BASE_HW_ISSUE_10931,
-	BASE_HW_ISSUE_11012,
-	BASE_HW_ISSUE_11020,
-	BASE_HW_ISSUE_11024,
-	BASE_HW_ISSUE_11042,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3964,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3966,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_t86x[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_TMIX_7891,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T720_1386,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_T83X_817,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T720_1386,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_T83X_817,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_t83x[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_T83X_817,
-	BASE_HW_ISSUE_TMIX_7891,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T720_1386,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3964,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_T83X_817,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T720_1386,
-	BASE_HW_ISSUE_T76X_1909,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_T83X_817,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_10821,
-	BASE_HW_ISSUE_10883,
-	BASE_HW_ISSUE_10946,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_11054,
-	BASE_HW_ISSUE_T720_1386,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3953,
-	BASE_HW_ISSUE_T76X_3960,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_T83X_817,
-	BASE_HW_ISSUE_TMIX_7891,
-	BASE_HW_ISSUE_TMIX_8438,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_t82x[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_11051,
-	BASE_HW_ISSUE_T76X_1963,
-	BASE_HW_ISSUE_T76X_3086,
-	BASE_HW_ISSUE_T76X_3700,
-	BASE_HW_ISSUE_T76X_3793,
-	BASE_HW_ISSUE_T76X_3979,
-	BASE_HW_ISSUE_T83X_817,
-	BASE_HW_ISSUE_TMIX_7891,
-	GPUCORE_1619,
-	BASE_HW_ISSUE_END
-};
-
 static const enum base_hw_issue base_hw_issues_tMIx_r0p0_05dev0[] = {
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_10682,
@@ -986,6 +153,9 @@
 	BASE_HW_ISSUE_TMIX_8463,
 	BASE_HW_ISSUE_TMIX_8456,
 	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1003,6 +173,9 @@
 	BASE_HW_ISSUE_TMIX_8463,
 	BASE_HW_ISSUE_TMIX_8456,
 	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1020,6 +193,9 @@
 	BASE_HW_ISSUE_TMIX_8463,
 	BASE_HW_ISSUE_TMIX_8456,
 	BASE_HW_ISSUE_TMIX_8438,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1034,6 +210,7 @@
 	BASE_HW_ISSUE_TMIX_8206,
 	BASE_HW_ISSUE_TMIX_8343,
 	BASE_HW_ISSUE_TMIX_8456,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_END
 };
 
@@ -1044,6 +221,9 @@
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8042,
 	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1054,6 +234,9 @@
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8042,
 	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1064,6 +247,9 @@
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8042,
 	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1073,6 +259,9 @@
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8042,
 	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1082,6 +271,7 @@
 	BASE_HW_ISSUE_TMIX_7891,
 	BASE_HW_ISSUE_TMIX_8042,
 	BASE_HW_ISSUE_TMIX_8133,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_END
 };
 
@@ -1090,6 +280,10 @@
 	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TSIX_1792,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1098,6 +292,10 @@
 	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TSIX_1792,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1106,6 +304,9 @@
 	BASE_HW_ISSUE_11054,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1113,6 +314,9 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1121,6 +325,7 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_END
 };
 
@@ -1128,6 +333,9 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1136,6 +344,7 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_END
 };
 
@@ -1143,7 +352,10 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1152,6 +364,7 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_END
 };
 
@@ -1159,7 +372,10 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TNOX_1194,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1167,7 +383,10 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TGOX_R1_1234,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
@@ -1176,66 +395,170 @@
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TMIX_8133,
 	BASE_HW_ISSUE_TSIX_1116,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_tKAx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_tKAx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_END
 };
 
 static const enum base_hw_issue base_hw_issues_tTRx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tTRx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
 static const enum base_hw_issue base_hw_issues_model_tTRx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_END
 };
 
-static const enum base_hw_issue base_hw_issues_tBOx_r0p0[] = {
+static const enum base_hw_issue base_hw_issues_tNAx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
 	BASE_HW_ISSUE_END
 };
 
-static const enum base_hw_issue base_hw_issues_model_tBOx[] = {
+static const enum base_hw_issue base_hw_issues_tNAx_r0p1[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_3076,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_GPU2017_1336,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tNAx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_END
 };
 
-static const enum base_hw_issue base_hw_issues_tEGx_r0p0[] = {
+static const enum base_hw_issue base_hw_issues_tBEx_r0p0[] = {
 	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_921,
 	BASE_HW_ISSUE_END
 };
 
-static const enum base_hw_issue base_hw_issues_model_tEGx[] = {
+static const enum base_hw_issue base_hw_issues_tBEx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tBEx[] = {
 	BASE_HW_ISSUE_5736,
 	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TMIX_8133,
-	BASE_HW_ISSUE_TSIX_1116,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tULx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tULx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tDUx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TTRX_921,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tDUx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tODx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tIDx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tIDx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
+	BASE_HW_ISSUE_5736,
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_END
 };
 
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h
index cc44ff2..a8ab408 100644
--- a/drivers/gpu/arm/midgard/mali_base_kernel.h
+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,6 @@
 } base_mem_handle;
 
 #include "mali_base_mem_priv.h"
-#include "mali_kbase_profiling_gator_api.h"
 #include "mali_midg_coherency.h"
 #include "mali_kbase_gpu_id.h"
 
@@ -87,6 +86,14 @@
  * @{
  */
 
+/* Physical memory group ID for normal usage.
+ */
+#define BASE_MEM_GROUP_DEFAULT (0)
+
+/* Number of physical memory groups.
+ */
+#define BASE_MEM_GROUP_COUNT (16)
+
 /**
  * typedef base_mem_alloc_flags - Memory allocation, access/hint flags.
  *
@@ -130,7 +137,7 @@
 /* Will be permanently mapped in kernel space.
  * Flag is only allowed on allocations originating from kbase.
  */
-#define BASE_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
+#define BASEP_MEM_PERMANENT_KERNEL_MAPPING ((base_mem_alloc_flags)1 << 5)
 
 /* The allocation will completely reside within the same 4GB chunk in the GPU
  * virtual space.
@@ -140,7 +147,11 @@
  */
 #define BASE_MEM_GPU_VA_SAME_4GB_PAGE ((base_mem_alloc_flags)1 << 6)
 
-#define BASE_MEM_RESERVED_BIT_7 ((base_mem_alloc_flags)1 << 7)
+/* Userspace is not allowed to free this memory.
+ * Flag is only allowed on allocations originating from kbase.
+ */
+#define BASEP_MEM_NO_USER_FREE ((base_mem_alloc_flags)1 << 7)
+
 #define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
 
 /* Grow backing store on GPU Page Fault
@@ -174,9 +185,9 @@
  */
 #define BASE_MEM_COHERENT_SYSTEM_REQUIRED ((base_mem_alloc_flags)1 << 15)
 
-/* Secure memory
+/* Protected memory
  */
-#define BASE_MEM_SECURE ((base_mem_alloc_flags)1 << 16)
+#define BASE_MEM_PROTECTED ((base_mem_alloc_flags)1 << 16)
 
 /* Not needed physical memory
  */
@@ -211,11 +222,24 @@
  */
 #define BASE_MEM_UNCACHED_GPU ((base_mem_alloc_flags)1 << 21)
 
-/* Number of bits used as flags for base memory management
+/*
+ * Bits [22:25] for group_id (0~15).
+ *
+ * base_mem_group_id_set() should be used to pack a memory group ID into a
+ * base_mem_alloc_flags value instead of accessing the bits directly.
+ * base_mem_group_id_get() should be used to extract the memory group ID from
+ * a base_mem_alloc_flags value.
+ */
+#define BASEP_MEM_GROUP_ID_SHIFT 22
+#define BASE_MEM_GROUP_ID_MASK \
+	((base_mem_alloc_flags)0xF << BASEP_MEM_GROUP_ID_SHIFT)
+
+/**
+ * Number of bits used as flags for base memory management
  *
  * Must be kept in sync with the base_mem_alloc_flags flags
  */
-#define BASE_MEM_FLAGS_NR_BITS 22
+#define BASE_MEM_FLAGS_NR_BITS 26
 
 /* A mask for all output bits, excluding IN/OUT bits.
  */
@@ -226,6 +250,43 @@
 #define BASE_MEM_FLAGS_INPUT_MASK \
 	(((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
 
+/**
+ * base_mem_group_id_get() - Get group ID from flags
+ * @flags: Flags to pass to base_mem_alloc
+ *
+ * This inline function extracts the encoded group ID from flags
+ * and converts it into numeric value (0~15).
+ *
+ * Return: group ID(0~15) extracted from the parameter
+ */
+static inline int base_mem_group_id_get(base_mem_alloc_flags flags)
+{
+	LOCAL_ASSERT((flags & ~BASE_MEM_FLAGS_INPUT_MASK) == 0);
+	return (int)((flags & BASE_MEM_GROUP_ID_MASK) >>
+			BASEP_MEM_GROUP_ID_SHIFT);
+}
+
+/**
+ * base_mem_group_id_set() - Set group ID into base_mem_alloc_flags
+ * @id: group ID(0~15) you want to encode
+ *
+ * This inline function encodes specific group ID into base_mem_alloc_flags.
+ * Parameter 'id' should lie in-between 0 to 15.
+ *
+ * Return: base_mem_alloc_flags with the group ID (id) encoded
+ *
+ * The return value can be combined with other flags against base_mem_alloc
+ * to identify a specific memory group.
+ */
+static inline base_mem_alloc_flags base_mem_group_id_set(int id)
+{
+	LOCAL_ASSERT(id >= 0);
+	LOCAL_ASSERT(id < BASE_MEM_GROUP_COUNT);
+
+	return ((base_mem_alloc_flags)id << BASEP_MEM_GROUP_ID_SHIFT) &
+		BASE_MEM_GROUP_ID_MASK;
+}
+
 /* A mask for all the flags which are modifiable via the base_mem_set_flags
  * interface.
  */
@@ -237,13 +298,13 @@
 /* A mask of all currently reserved flags
  */
 #define BASE_MEM_FLAGS_RESERVED \
-	(BASE_MEM_RESERVED_BIT_7 | BASE_MEM_RESERVED_BIT_8 | \
-		BASE_MEM_MAYBE_RESERVED_BIT_19)
+	(BASE_MEM_RESERVED_BIT_8 | BASE_MEM_MAYBE_RESERVED_BIT_19)
 
 /* A mask of all the flags which are only valid for allocations within kbase,
  * and may not be passed from user space.
  */
-#define BASE_MEM_FLAGS_KERNEL_ONLY (BASE_MEM_PERMANENT_KERNEL_MAPPING)
+#define BASEP_MEM_FLAGS_KERNEL_ONLY \
+	(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE)
 
 /* A mask of all the flags that can be returned via the base_mem_get_flags()
  * interface.
@@ -252,7 +313,7 @@
 	(BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_SAME_VA | \
 		BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_DONT_NEED | \
 		BASE_MEM_IMPORT_SHARED | BASE_MEM_FLAGS_RESERVED | \
-		BASE_MEM_FLAGS_KERNEL_ONLY))
+		BASEP_MEM_FLAGS_KERNEL_ONLY))
 
 /**
  * enum base_mem_import_type - Memory types supported by @a base_mem_import
@@ -320,7 +381,7 @@
 #define BASE_MEM_TRACE_BUFFER_HANDLE           (2ull  << 12)
 #define BASE_MEM_MAP_TRACKING_HANDLE           (3ull  << 12)
 #define BASEP_MEM_WRITE_ALLOC_PAGES_HANDLE     (4ull  << 12)
-/* reserved handles ..-48<<PAGE_SHIFT> for future special handles */
+/* reserved handles ..-47<<PAGE_SHIFT> for future special handles */
 #define BASE_MEM_COOKIE_BASE                   (64ul  << 12)
 #define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
 						BASE_MEM_COOKIE_BASE)
@@ -349,15 +410,6 @@
 
 
 /**
- * @brief Result codes of changing the size of the backing store allocated to a tmem region
- */
-typedef enum base_backing_threshold_status {
-	BASE_BACKING_THRESHOLD_OK = 0,			    /**< Resize successful */
-	BASE_BACKING_THRESHOLD_ERROR_OOM = -2,		    /**< Increase failed due to an out-of-memory condition */
-	BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
-} base_backing_threshold_status;
-
-/**
  * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
  * @{
  */
@@ -609,43 +661,8 @@
 #define BASE_JD_REQ_SOFT_FENCE_TRIGGER          (BASE_JD_REQ_SOFT_JOB | 0x2)
 #define BASE_JD_REQ_SOFT_FENCE_WAIT             (BASE_JD_REQ_SOFT_JOB | 0x3)
 
-/**
- * SW Only requirement : Replay job.
- *
- * If the preceding job fails, the replay job will cause the jobs specified in
- * the list of base_jd_replay_payload pointed to by the jc pointer to be
- * replayed.
- *
- * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
- * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
- * job is failed, as well as any following dependencies.
- *
- * The replayed jobs will require a number of atom IDs. If there are not enough
- * free atom IDs then the replay job will fail.
- *
- * If the preceding job does not fail, then the replay job is returned as
- * completed.
- *
- * The replayed jobs will never be returned to userspace. The preceding failed
- * job will be returned to userspace as failed; the status of this job should
- * be ignored. Completion should be determined by the status of the replay soft
- * job.
- *
- * In order for the jobs to be replayed, the job headers will have to be
- * modified. The Status field will be reset to NOT_STARTED. If the Job Type
- * field indicates a Vertex Shader Job then it will be changed to Null Job.
- *
- * The replayed jobs have the following assumptions :
- *
- * - No external resources. Any required external resources will be held by the
- *   replay atom.
- * - Pre-dependencies are created based on job order.
- * - Atom numbers are automatically assigned.
- * - device_nr is set to 0. This is not relevant as
- *   BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
- * - Priority is inherited from the replay job.
- */
-#define BASE_JD_REQ_SOFT_REPLAY                 (BASE_JD_REQ_SOFT_JOB | 0x4)
+/* 0x4 RESERVED for now */
+
 /**
  * SW only requirement: event wait/trigger job.
  *
@@ -766,6 +783,14 @@
 #define BASE_JD_REQ_SKIP_CACHE_END ((base_jd_core_req)1 << 16)
 
 /**
+ * Request the atom be executed on a specific job slot.
+ *
+ * When this flag is specified, it takes precedence over any existing job slot
+ * selection logic.
+ */
+#define BASE_JD_REQ_JOB_SLOT ((base_jd_core_req)1 << 17)
+
+/**
  * These requirement bits are currently unused in base_jd_core_req
  */
 #define BASEP_JD_REQ_RESERVED \
@@ -774,7 +799,8 @@
 	BASE_JD_REQ_EVENT_COALESCE | \
 	BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP | \
 	BASE_JD_REQ_FS_AFBC | BASE_JD_REQ_PERMON | \
-	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END))
+	BASE_JD_REQ_SKIP_CACHE_START | BASE_JD_REQ_SKIP_CACHE_END | \
+	BASE_JD_REQ_JOB_SLOT))
 
 /**
  * Mask of all bits in base_jd_core_req that control the type of the atom.
@@ -798,24 +824,6 @@
 	((core_req & BASE_JD_REQ_SOFT_JOB) || \
 	(core_req & BASE_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP)
 
-/**
- * enum kbase_atom_coreref_state - States to model state machine processed by
- * kbasep_js_job_check_ref_cores(), which handles retaining cores for power
- * management.
- *
- * @KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: Starting state: Cores must be
- * requested.
- * @KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: Cores requested, but
- * waiting for them to be powered
- * @KBASE_ATOM_COREREF_STATE_READY: Cores are powered, atom can be submitted to
- * HW
- */
-enum kbase_atom_coreref_state {
-	KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
-	KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
-	KBASE_ATOM_COREREF_STATE_READY
-};
-
 /*
  * Base Atom priority
  *
@@ -823,15 +831,16 @@
  * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
  * level that is not one of those defined below.
  *
- * Priority levels only affect scheduling between atoms of the same type within
- * a base context, and only after the atoms have had dependencies resolved.
- * Fragment atoms does not affect non-frament atoms with lower priorities, and
- * the other way around. For example, a low priority atom that has had its
- * dependencies resolved might run before a higher priority atom that has not
- * had its dependencies resolved.
+ * Priority levels only affect scheduling after the atoms have had dependencies
+ * resolved. For example, a low priority atom that has had its dependencies
+ * resolved might run before a higher priority atom that has not had its
+ * dependencies resolved.
  *
- * The scheduling between base contexts/processes and between atoms from
- * different base contexts/processes is unaffected by atom priority.
+ * In general, fragment atoms do not affect non-fragment atoms with
+ * lower priorities, and vice versa. One exception is that there is only one
+ * priority value for each context. So a high-priority (e.g.) fragment atom
+ * could increase its context priority, causing its non-fragment atoms to also
+ * be scheduled sooner.
  *
  * The atoms are scheduled as follows with respect to their priorities:
  * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
@@ -843,6 +852,14 @@
  * - Any two atoms that have the same priority could run in any order with
  *   respect to each other. That is, there is no ordering constraint between
  *   atoms of the same priority.
+ *
+ * The sysfs file 'js_ctx_scheduling_mode' is used to control how atoms are
+ * scheduled between contexts. The default value, 0, will cause higher-priority
+ * atoms to be scheduled first, regardless of their context. The value 1 will
+ * use a round-robin algorithm when deciding which context's atoms to schedule
+ * next, so higher-priority atoms can only preempt lower priority atoms within
+ * the same context. See KBASE_JS_SYSTEM_PRIORITY_MODE and
+ * KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE for more details.
  */
 typedef u8 base_jd_prio;
 
@@ -897,7 +914,7 @@
 	base_atom_id atom_number;	    /**< unique number to identify the atom */
 	base_jd_prio prio;                  /**< Atom priority. Refer to @ref base_jd_prio for more details */
 	u8 device_nr;			    /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
-	u8 padding[1];
+	u8 jobslot;			    /**< Job slot to use when BASE_JD_REQ_JOB_SLOT is specified */
 	base_jd_core_req core_req;          /**< core requirements */
 } base_jd_atom_v2;
 
@@ -1181,7 +1198,6 @@
 	BASE_JD_EVENT_JOB_CANCELLED	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
 	BASE_JD_EVENT_JOB_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
 	BASE_JD_EVENT_PM_EVENT		= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
-	BASE_JD_EVENT_FORCE_REPLAY	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
 
 	BASE_JD_EVENT_BAG_INVALID	= BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
 
@@ -1644,20 +1660,27 @@
 	((base_context_create_flags)1 << 1)
 
 
-/**
- * Bitpattern describing the ::base_context_create_flags that can be
- * passed to base_context_init()
+/* Bit-shift used to encode a memory group ID in base_context_create_flags
  */
-#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
-	(BASE_CONTEXT_CCTX_EMBEDDED | \
-	 BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED)
+#define BASEP_CONTEXT_MMU_GROUP_ID_SHIFT (3)
 
-/**
- * Bitpattern describing the ::base_context_create_flags that can be
+/* Bitmask used to encode a memory group ID in base_context_create_flags
+ */
+#define BASEP_CONTEXT_MMU_GROUP_ID_MASK \
+	((base_context_create_flags)0xF << BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+
+/* Bitpattern describing the base_context_create_flags that can be
  * passed to the kernel
  */
-#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
-	BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED
+#define BASEP_CONTEXT_CREATE_KERNEL_FLAGS \
+	(BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED | \
+	 BASEP_CONTEXT_MMU_GROUP_ID_MASK)
+
+/* Bitpattern describing the ::base_context_create_flags that can be
+ * passed to base_context_init()
+ */
+#define BASEP_CONTEXT_CREATE_ALLOWED_FLAGS \
+	(BASE_CONTEXT_CCTX_EMBEDDED | BASEP_CONTEXT_CREATE_KERNEL_FLAGS)
 
 /*
  * Private flags used on the base context
@@ -1668,7 +1691,46 @@
  * not collide with them.
  */
 /** Private flag tracking whether job descriptor dumping is disabled */
-#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED ((u32)(1 << 31))
+#define BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED \
+	((base_context_create_flags)(1 << 31))
+
+/**
+ * base_context_mmu_group_id_set - Encode a memory group ID in
+ *                                 base_context_create_flags
+ *
+ * Memory allocated for GPU page tables will come from the specified group.
+ *
+ * @group_id: Physical memory group ID. Range is 0..(BASE_MEM_GROUP_COUNT-1).
+ *
+ * Return: Bitmask of flags to pass to base_context_init.
+ */
+static inline base_context_create_flags base_context_mmu_group_id_set(
+	int const group_id)
+{
+	LOCAL_ASSERT(group_id >= 0);
+	LOCAL_ASSERT(group_id < BASE_MEM_GROUP_COUNT);
+	return BASEP_CONTEXT_MMU_GROUP_ID_MASK &
+		((base_context_create_flags)group_id <<
+		BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
+}
+
+/**
+ * base_context_mmu_group_id_get - Decode a memory group ID from
+ *                                 base_context_create_flags
+ *
+ * Memory allocated for GPU page tables will come from the returned group.
+ *
+ * @flags: Bitmask of flags to pass to base_context_init.
+ *
+ * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1).
+ */
+static inline int base_context_mmu_group_id_get(
+	base_context_create_flags const flags)
+{
+	LOCAL_ASSERT(flags == (flags & BASEP_CONTEXT_CREATE_ALLOWED_FLAGS));
+	return (int)((flags & BASEP_CONTEXT_MMU_GROUP_ID_MASK) >>
+			BASEP_CONTEXT_MMU_GROUP_ID_SHIFT);
+}
 
 /** @} end group base_user_api_core */
 
@@ -1695,82 +1757,8 @@
  * @{
  */
 
-/**
- * @brief The payload for a replay job. This must be in GPU memory.
- */
-typedef struct base_jd_replay_payload {
-	/**
-	 * Pointer to the first entry in the base_jd_replay_jc list.  These
-	 * will be replayed in @b reverse order (so that extra ones can be added
-	 * to the head in future soft jobs without affecting this soft job)
-	 */
-	u64 tiler_jc_list;
-
-	/**
-	 * Pointer to the fragment job chain.
-	 */
-	u64 fragment_jc;
-
-	/**
-	 * Pointer to the tiler heap free FBD field to be modified.
-	 */
-	u64 tiler_heap_free;
-
-	/**
-	 * Hierarchy mask for the replayed fragment jobs. May be zero.
-	 */
-	u16 fragment_hierarchy_mask;
-
-	/**
-	 * Hierarchy mask for the replayed tiler jobs. May be zero.
-	 */
-	u16 tiler_hierarchy_mask;
-
-	/**
-	 * Default weight to be used for hierarchy levels not in the original
-	 * mask.
-	 */
-	u32 hierarchy_default_weight;
-
-	/**
-	 * Core requirements for the tiler job chain
-	 */
-	base_jd_core_req tiler_core_req;
-
-	/**
-	 * Core requirements for the fragment job chain
-	 */
-	base_jd_core_req fragment_core_req;
-} base_jd_replay_payload;
-
-/**
- * @brief An entry in the linked list of job chains to be replayed. This must
- *        be in GPU memory.
- */
-typedef struct base_jd_replay_jc {
-	/**
-	 * Pointer to next entry in the list. A setting of NULL indicates the
-	 * end of the list.
-	 */
-	u64 next;
-
-	/**
-	 * Pointer to the job chain.
-	 */
-	u64 jc;
-
-} base_jd_replay_jc;
-
-/* Maximum number of jobs allowed in a fragment chain in the payload of a
- * replay job */
-#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
-
 /** @} end group base_api */
 
-typedef struct base_profiling_controls {
-	u32 profiling_controls[FBDUMP_CONTROL_MAX];
-} base_profiling_controls;
-
 /* Enable additional tracepoints for latency measurements (TL_ATOM_READY,
  * TL_ATOM_DONE, TL_ATOM_PRIO_CHANGE, TL_ATOM_EVENT_POST) */
 #define BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS (1 << 0)
@@ -1782,5 +1770,23 @@
 #define BASE_TLSTREAM_FLAGS_MASK (BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS | \
 		BASE_TLSTREAM_JOB_DUMPING_ENABLED)
 
+/**
+ * A number of bit flags are defined for requesting cpu_gpu_timeinfo. These
+ * flags are also used, where applicable, for specifying which fields
+ * are valid following the request operation.
+ */
+
+/* For monotonic (counter) timefield */
+#define BASE_TIMEINFO_MONOTONIC_FLAG (1UL << 0)
+/* For system wide timestamp */
+#define BASE_TIMEINFO_TIMESTAMP_FLAG (1UL << 1)
+/* For GPU cycle counter */
+#define BASE_TIMEINFO_CYCLE_COUNTER_FLAG (1UL << 2)
+
+#define BASE_TIMEREQUEST_ALLOWED_FLAGS (\
+		BASE_TIMEINFO_MONOTONIC_FLAG | \
+		BASE_TIMEINFO_TIMESTAMP_FLAG | \
+		BASE_TIMEINFO_CYCLE_COUNTER_FLAG)
+
 
 #endif				/* _BASE_KERNEL_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h
index dc0d5f1..1ab785e 100644
--- a/drivers/gpu/arm/midgard/mali_kbase.h
+++ b/drivers/gpu/arm/midgard/mali_kbase.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -68,7 +68,7 @@
 #include "mali_kbase_jd_debugfs.h"
 #include "mali_kbase_gpuprops.h"
 #include "mali_kbase_jm.h"
-#include "mali_kbase_vinstr.h"
+#include "mali_kbase_ioctl.h"
 
 #include "ipa/mali_kbase_ipa.h"
 
@@ -82,6 +82,11 @@
 #define u64_to_user_ptr(x) ((void __user *)(uintptr_t)x)
 #endif
 
+
+/* Physical memory group ID for a special page which can alias several regions.
+ */
+#define KBASE_MEM_GROUP_SINK BASE_MEM_GROUP_DEFAULT
+
 /*
  * Kernel-side Base (KBase) APIs
  */
@@ -109,13 +114,10 @@
 struct kbase_device *kbase_find_device(int minor);
 void kbase_release_device(struct kbase_device *kbdev);
 
-void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
-
-
 /**
- * kbase_get_unmapped_area() - get an address range which is currently
- *                             unmapped.
- * @filp: File operations associated with kbase device.
+ * kbase_context_get_unmapped_area() - get an address range which is currently
+ *                                     unmapped.
+ * @kctx: A kernel base context (which has its own GPU address space).
  * @addr: CPU mapped address (set to 0 since MAP_FIXED mapping is not allowed
  *        as Mali GPU driver decides about the mapping).
  * @len: Length of the address range.
@@ -150,7 +152,7 @@
  * Return: if successful, address of the unmapped area aligned as required;
  *         error code (negative) in case of failure;
  */
-unsigned long kbase_get_unmapped_area(struct file *filp,
+unsigned long kbase_context_get_unmapped_area(struct kbase_context *kctx,
 		const unsigned long addr, const unsigned long len,
 		const unsigned long pgoff, const unsigned long flags);
 
@@ -203,6 +205,16 @@
 			 struct kbase_jd_atom *katom);
 void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
 
+/**
+ * kbase_job_done - Process completed jobs from job interrupt
+ * @kbdev: Pointer to the kbase device.
+ * @done: Bitmask of done or failed jobs, from JOB_IRQ_STAT register
+ *
+ * This function processes the completed, or failed, jobs from the GPU job
+ * slots, for the bits set in the @done bitmask.
+ *
+ * The hwaccess_lock must be held when calling this function.
+ */
 void kbase_job_done(struct kbase_device *kbdev, u32 done);
 
 /**
@@ -251,6 +263,32 @@
 int kbasep_jit_alloc_validate(struct kbase_context *kctx,
 					struct base_jit_alloc_info *info);
 /**
+ * kbase_free_user_buffer() - Free memory allocated for struct
+ *		@kbase_debug_copy_buffer.
+ *
+ * @buffer:	Pointer to the memory location allocated for the object
+ *		of the type struct @kbase_debug_copy_buffer.
+ */
+static inline void kbase_free_user_buffer(
+		struct kbase_debug_copy_buffer *buffer)
+{
+	struct page **pages = buffer->extres_pages;
+	int nr_pages = buffer->nr_extres_pages;
+
+	if (pages) {
+		int i;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *pg = pages[i];
+
+			if (pg)
+				put_page(pg);
+		}
+		kfree(pages);
+	}
+}
+
+/**
  * kbase_mem_copy_from_extres_page() - Copy pages from external resources.
  *
  * @kctx:		kbase context within which the copying is to take place.
@@ -261,8 +299,8 @@
  * @nr_pages:		Number of pages to copy.
  * @target_page_nr:	Number of target pages which will be used for copying.
  * @offset:		Offset into the target pages from which the copying
- *			is to be performed. 
- * @to_copy:		Size of the chunk to be copied, in bytes. 
+ *			is to be performed.
+ * @to_copy:		Size of the chunk to be copied, in bytes.
  */
 void kbase_mem_copy_from_extres_page(struct kbase_context *kctx,
 		void *extres_page, struct page **pages, unsigned int nr_pages,
@@ -290,8 +328,6 @@
 			    u64 event,
 			    unsigned char new_status);
 
-bool kbase_replay_process(struct kbase_jd_atom *katom);
-
 void kbasep_soft_job_timeout_worker(struct timer_list *timer);
 void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
 
@@ -329,25 +365,34 @@
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
- * This takes into account the following
- *
- * - whether there is an active context reference
- *
- * - whether any of the shader cores or the tiler are needed
- *
- * It should generally be preferred against checking just
- * kbdev->pm.active_count on its own, because some code paths drop their
- * reference on this whilst still having the shader cores/tiler in use.
+ * This takes into account whether there is an active context reference.
  *
  * Return: true if the GPU is active, false otherwise
  */
 static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
 {
-	return (kbdev->pm.active_count > 0 || kbdev->shader_needed_cnt ||
-			kbdev->tiler_needed_cnt);
+	return kbdev->pm.active_count > 0;
 }
 
 /**
+ * kbase_pm_metrics_start - Start the utilization metrics timer
+ * @kbdev: Pointer to the kbase device for which to start the utilization
+ *         metrics calculation thread.
+ *
+ * Start the timer that drives the metrics calculation, runs the custom DVFS.
+ */
+void kbase_pm_metrics_start(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_metrics_stop - Stop the utilization metrics timer
+ * @kbdev: Pointer to the kbase device for which to stop the utilization
+ *         metrics calculation thread.
+ *
+ * Stop the timer that drives the metrics calculation, runs the custom DVFS.
+ */
+void kbase_pm_metrics_stop(struct kbase_device *kbdev);
+
+/**
  * Return the atom's ID, as was originally supplied by userspace in
  * base_jd_atom_v2::atom_number
  */
@@ -390,9 +435,8 @@
  * the counter during disjoint events we also increment the counter when jobs may be affected
  * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
  *
- * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
- * (as part of the replay workaround). Increasing the disjoint state also increases the count of
- * disjoint events.
+ * Disjoint state is entered during GPU reset. Increasing the disjoint state also increases
+ * the count of disjoint events.
  *
  * The disjoint state is then used to increase the count of disjoint events during job submission
  * and job completion. Any atom submitted or completed while the disjoint state is greater than
@@ -689,6 +733,3 @@
 
 
 #endif
-
-
-
diff --git a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
index 2e99a4d..6f638cc 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_as_fault_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -48,7 +48,7 @@
 
 			/* output the last page fault addr */
 			seq_printf(sfile, "%llu\n",
-				   (u64) kbdev->as[as_no].fault_addr);
+				   (u64) kbdev->as[as_no].pf_data.addr);
 		}
 
 	}
@@ -64,6 +64,7 @@
 }
 
 static const struct file_operations as_fault_fops = {
+	.owner = THIS_MODULE,
 	.open = kbase_as_fault_debugfs_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -87,7 +88,7 @@
 	kbdev->debugfs_as_read_bitmap = 0ULL;
 
 	KBASE_DEBUG_ASSERT(kbdev->nr_hw_address_spaces);
-	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].fault_addr) == sizeof(u64));
+	KBASE_DEBUG_ASSERT(sizeof(kbdev->as[0].pf_data.addr) == sizeof(u64));
 
 	debugfs_directory = debugfs_create_dir("address_spaces",
 					       kbdev->mali_debugfs_directory);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_bits.h b/drivers/gpu/arm/midgard/mali_kbase_bits.h
new file mode 100644
index 0000000..2c11093
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_bits.h
@@ -0,0 +1,41 @@
+/*
+ *
+ * (C) COPYRIGHT ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ *//* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ */
+
+#ifndef _KBASE_BITS_H_
+#define _KBASE_BITS_H_
+
+#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
+#include <linux/bits.h>
+#else
+#include <linux/bitops.h>
+#endif
+
+#endif /* _KBASE_BITS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
index e4575b6..447e059 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -109,48 +109,6 @@
 };
 
 /**
- * Default setting for read Address ID limiting on AXI bus.
- *
- * Attached value: u32 register value
- *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
- *    KBASE_AID_16 - use 16 IDs (4 ID bits)
- *    KBASE_AID_8  - use 8 IDs (3 ID bits)
- *    KBASE_AID_4  - use 4 IDs (2 ID bits)
- * Default value: KBASE_AID_32 (no limit). Note hardware implementation
- * may limit to a lower value.
- */
-#define DEFAULT_ARID_LIMIT KBASE_AID_32
-
-/**
- * Default setting for write Address ID limiting on AXI.
- *
- * Attached value: u32 register value
- *    KBASE_AID_32 - use the full 32 IDs (5 ID bits)
- *    KBASE_AID_16 - use 16 IDs (4 ID bits)
- *    KBASE_AID_8  - use 8 IDs (3 ID bits)
- *    KBASE_AID_4  - use 4 IDs (2 ID bits)
- * Default value: KBASE_AID_32 (no limit). Note hardware implementation
- * may limit to a lower value.
- */
-#define DEFAULT_AWID_LIMIT KBASE_AID_32
-
-/**
- * Default setting for read Address ID limiting on AXI bus.
- *
- * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
- * may limit to a lower value.
- */
-#define DEFAULT_3BIT_ARID_LIMIT KBASE_3BIT_AID_32
-
-/**
- * Default setting for write Address ID limiting on AXI.
- *
- * Default value: KBASE_3BIT_AID_32 (no limit). Note hardware implementation
- * may limit to a lower value.
- */
-#define DEFAULT_3BIT_AWID_LIMIT KBASE_3BIT_AID_32
-
-/**
  * Default period for DVFS sampling
  */
 #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
@@ -171,11 +129,6 @@
 #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
 
 /**
- * Power Manager number of ticks before GPU is powered off
- */
-#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
-
-/**
  * Default scheduling tick granuality
  */
 #define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
@@ -253,22 +206,6 @@
 #define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
 
 /**
- * Perform GPU power down using only platform specific code, skipping DDK power
- * management.
- *
- * If this is non-zero then kbase will avoid powering down shader cores, the
- * tiler, and the L2 cache, instead just powering down the entire GPU through
- * platform specific code. This may be required for certain platform
- * integrations.
- *
- * Note that as this prevents kbase from powering down shader cores, this limits
- * the available power policies to coarse_demand and always_on.
- */
-#ifndef PLATFORM_POWER_DOWN_ONLY
-#define PLATFORM_POWER_DOWN_ONLY (0)
-#endif
-
-/**
  * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
  * this isn't available, so we simply define a dummy value here. If devfreq
  * is enabled the value will be read from there, otherwise this should be
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c
index 970be89..e72ce70 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_context.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,15 +31,27 @@
 #include <mali_kbase_mem_linux.h>
 #include <mali_kbase_dma_fence.h>
 #include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_mem_pool_group.h>
+#include <mali_kbase_tracepoints.h>
 
 struct kbase_context *
-kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+kbase_create_context(struct kbase_device *kbdev, bool is_compat,
+	base_context_create_flags const flags,
+	unsigned long const api_version,
+	struct file *const filp)
 {
 	struct kbase_context *kctx;
 	int err;
 	struct page *p;
+	struct kbasep_js_kctx_info *js_kctx_info = NULL;
+	unsigned long irq_flags = 0;
 
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	if (WARN_ON(!kbdev))
+		goto out;
+
+	/* Validate flags */
+	if (WARN_ON(flags != (flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS)))
+		goto out;
 
 	/* zero-inited as lot of code assume it's zero'ed out on create */
 	kctx = vzalloc(sizeof(*kctx));
@@ -60,31 +72,19 @@
 		kbase_ctx_flag_set(kctx, KCTX_FORCE_SAME_VA);
 #endif /* !defined(CONFIG_64BIT) */
 
-	atomic_set(&kctx->setup_complete, 0);
-	atomic_set(&kctx->setup_in_progress, 0);
 	spin_lock_init(&kctx->mm_update_lock);
 	kctx->process_mm = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
+	atomic_set(&kctx->permanent_mapped_pages, 0);
 	kctx->slots_pullable = 0;
 	kctx->tgid = current->tgid;
 	kctx->pid = current->pid;
 
-	err = kbase_mem_pool_init(&kctx->mem_pool,
-				  kbdev->mem_pool_max_size_default,
-				  KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
-				  kctx->kbdev,
-				  &kbdev->mem_pool);
+	err = kbase_mem_pool_group_init(&kctx->mem_pools, kbdev,
+		&kbdev->mem_pool_defaults, &kbdev->mem_pools);
 	if (err)
 		goto free_kctx;
 
-	err = kbase_mem_pool_init(&kctx->lp_mem_pool,
-				  (kbdev->mem_pool_max_size_default >> 9),
-				  KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
-				  kctx->kbdev,
-				  &kbdev->lp_mem_pool);
-	if (err)
-		goto free_mem_pool;
-
 	err = kbase_mem_evictable_init(kctx);
 	if (err)
 		goto free_both_pools;
@@ -103,7 +103,6 @@
 	if (err)
 		goto free_jd;
 
-
 	atomic_set(&kctx->drain_pending, 0);
 
 	mutex_init(&kctx->reg_lock);
@@ -117,11 +116,13 @@
 	if (err)
 		goto free_event;
 
-	err = kbase_mmu_init(kbdev, &kctx->mmu, kctx);
+	err = kbase_mmu_init(kbdev, &kctx->mmu, kctx,
+		base_context_mmu_group_id_get(flags));
 	if (err)
 		goto term_dma_fence;
 
-	p = kbase_mem_alloc_page(&kctx->mem_pool);
+	p = kbase_mem_alloc_page(
+		&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK]);
 	if (!p)
 		goto no_sink_page;
 	kctx->aliasing_sink_page = as_tagged(page_to_phys(p));
@@ -130,7 +131,6 @@
 
 	kctx->cookies = KBASE_COOKIE_MASK;
 
-
 	/* Make sure page 0 is not used... */
 	err = kbase_region_tracker_init(kctx);
 	if (err)
@@ -143,17 +143,39 @@
 	err = kbase_jit_init(kctx);
 	if (err)
 		goto no_jit;
+
+
 #ifdef CONFIG_GPU_TRACEPOINTS
 	atomic_set(&kctx->jctx.work_id, 0);
 #endif
 
 	kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
 
-	mutex_init(&kctx->vinstr_cli_lock);
+	mutex_init(&kctx->legacy_hwcnt_lock);
 
 	kbase_timer_setup(&kctx->soft_job_timeout,
 			  kbasep_soft_job_timeout_worker);
 
+	mutex_lock(&kbdev->kctx_list_lock);
+	list_add(&kctx->kctx_list_link, &kbdev->kctx_list);
+	KBASE_TLSTREAM_TL_NEW_CTX(kbdev, kctx, kctx->id, (u32)(kctx->tgid));
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	kctx->api_version = api_version;
+	kctx->filp = filp;
+
+	js_kctx_info = &kctx->jctx.sched_info;
+
+	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+	/* Translate the flags */
+	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
 	return kctx;
 
 no_jit:
@@ -163,7 +185,8 @@
 no_sticky:
 	kbase_region_tracker_term(kctx);
 no_region_tracker:
-	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+	kbase_mem_pool_free(
+		&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK], p, false);
 no_sink_page:
 	kbase_mmu_term(kbdev, &kctx->mmu);
 term_dma_fence:
@@ -177,9 +200,7 @@
 deinit_evictable:
 	kbase_mem_evictable_deinit(kctx);
 free_both_pools:
-	kbase_mem_pool_term(&kctx->lp_mem_pool);
-free_mem_pool:
-	kbase_mem_pool_term(&kctx->mem_pool);
+	kbase_mem_pool_group_term(&kctx->mem_pools);
 free_kctx:
 	vfree(kctx);
 out:
@@ -204,10 +225,17 @@
 	unsigned long flags;
 	struct page *p;
 
-	KBASE_DEBUG_ASSERT(NULL != kctx);
+	if (WARN_ON(!kctx))
+		return;
 
 	kbdev = kctx->kbdev;
-	KBASE_DEBUG_ASSERT(NULL != kbdev);
+	if (WARN_ON(!kbdev))
+		return;
+
+	mutex_lock(&kbdev->kctx_list_lock);
+	KBASE_TLSTREAM_TL_DEL_CTX(kbdev, kctx);
+	list_del(&kctx->kctx_list_link);
+	mutex_unlock(&kbdev->kctx_list_lock);
 
 	KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
 
@@ -216,15 +244,26 @@
 	 * thread. */
 	kbase_pm_context_active(kbdev);
 
-	kbase_mem_pool_mark_dying(&kctx->mem_pool);
+	kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
 
 	kbase_jd_zap_context(kctx);
 
+	/* We have already waited for the jobs to complete (and hereafter there
+	 * can be no more submissions for the context). However the wait could
+	 * have timedout and there could still be work items in flight that
+	 * would do the completion processing of jobs.
+	 * kbase_jd_exit() will destroy the 'job_done_wq'. And destroying the wq
+	 * will cause it do drain and implicitly wait for those work items to
+	 * complete.
+	 */
+	kbase_jd_exit(kctx);
+
 #ifdef CONFIG_DEBUG_FS
 	/* Removing the rest of the debugfs entries here as we want to keep the
 	 * atom debugfs interface alive until all atoms have completed. This
 	 * is useful for debugging hung contexts. */
 	debugfs_remove_recursive(kctx->kctx_dentry);
+	kbase_debug_job_fault_context_term(kctx);
 #endif
 
 	kbase_event_cleanup(kctx);
@@ -244,7 +283,8 @@
 
 	/* drop the aliasing sink page now that it can't be mapped anymore */
 	p = as_page(kctx->aliasing_sink_page);
-	kbase_mem_pool_free(&kctx->mem_pool, p, false);
+	kbase_mem_pool_free(&kctx->mem_pools.small[KBASE_MEM_GROUP_SINK],
+		p, false);
 
 	/* free pending region setups */
 	pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
@@ -262,12 +302,9 @@
 	kbase_region_tracker_term(kctx);
 	kbase_gpu_vm_unlock(kctx);
 
-
 	/* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
 	kbasep_js_kctx_term(kctx);
 
-	kbase_jd_exit(kctx);
-
 	kbase_dma_fence_term(kctx);
 
 	mutex_lock(&kbdev->mmu_hw_mutex);
@@ -283,8 +320,9 @@
 		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
 
 	kbase_mem_evictable_deinit(kctx);
-	kbase_mem_pool_term(&kctx->mem_pool);
-	kbase_mem_pool_term(&kctx->lp_mem_pool);
+
+	kbase_mem_pool_group_term(&kctx->mem_pools);
+
 	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
 
 	vfree(kctx);
@@ -292,36 +330,3 @@
 	kbase_pm_context_idle(kbdev);
 }
 KBASE_EXPORT_SYMBOL(kbase_destroy_context);
-
-int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
-{
-	int err = 0;
-	struct kbasep_js_kctx_info *js_kctx_info;
-	unsigned long irq_flags;
-
-	KBASE_DEBUG_ASSERT(NULL != kctx);
-
-	js_kctx_info = &kctx->jctx.sched_info;
-
-	/* Validate flags */
-	if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
-		err = -EINVAL;
-		goto out;
-	}
-
-	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
-	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
-
-	/* Translate the flags */
-	if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
-		kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
-
-	/* Latch the initial attributes into the Job Scheduler */
-	kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
-
-	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
-	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
- out:
-	return err;
-}
-KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.h b/drivers/gpu/arm/midgard/mali_kbase_context.h
index 30b0f64..5037b4e 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_context.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2016, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2017, 2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,36 +27,38 @@
 
 /**
  * kbase_create_context() - Create a kernel base context.
- * @kbdev: Kbase device
- * @is_compat: Force creation of a 32-bit context
  *
- * Allocate and init a kernel base context.
+ * @kbdev:       Object representing an instance of GPU platform device,
+ *               allocated from the probe method of the Mali driver.
+ * @is_compat:   Force creation of a 32-bit context
+ * @flags:       Flags to set, which shall be any combination of
+ *               BASEP_CONTEXT_CREATE_KERNEL_FLAGS.
+ * @api_version: Application program interface version, as encoded in
+ *               a single integer by the KBASE_API_VERSION macro.
+ * @filp:        Pointer to the struct file corresponding to device file
+ *               /dev/malixx instance, passed to the file's open method.
  *
- * Return: new kbase context
+ * Up to one context can be created for each client that opens the device file
+ * /dev/malixx. Context creation is deferred until a special ioctl() system call
+ * is made on the device file. Each context has its own GPU address space.
+ *
+ * Return: new kbase context or NULL on failure
  */
 struct kbase_context *
-kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+kbase_create_context(struct kbase_device *kbdev, bool is_compat,
+	base_context_create_flags const flags,
+	unsigned long api_version,
+	struct file *filp);
 
 /**
  * kbase_destroy_context - Destroy a kernel base context.
  * @kctx: Context to destroy
  *
- * Calls kbase_destroy_os_context() to free OS specific structures.
  * Will release all outstanding regions.
  */
 void kbase_destroy_context(struct kbase_context *kctx);
 
 /**
- * kbase_context_set_create_flags - Set creation flags on a context
- * @kctx: Kbase context
- * @flags: Flags to set, which shall be one of the flags of
- *         BASE_CONTEXT_CREATE_KERNEL_FLAGS.
- *
- * Return: 0 on success, -EINVAL otherwise when an invalid flag is specified.
- */
-int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
-
-/**
  * kbase_ctx_flag - Check if @flag is set on @kctx
  * @kctx: Pointer to kbase context to check
  * @flag: Flag to check
@@ -107,7 +109,7 @@
 /**
  * kbase_ctx_flag_set - Set @flag on @kctx
  * @kctx: Pointer to kbase context
- * @flag: Flag to clear
+ * @flag: Flag to set
  *
  * Set the @flag on @kctx. This is done atomically, so other flags being
  * cleared or set at the same time will be safe.
diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
index ccb2932..57acbf9 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,17 +40,24 @@
 #include "mali_kbase_debug_mem_view.h"
 #include "mali_kbase_mem.h"
 #include "mali_kbase_mem_pool_debugfs.h"
+#include "mali_kbase_debugfs_helper.h"
 #if !MALI_CUSTOMER_RELEASE
 #include "mali_kbase_regs_dump_debugfs.h"
 #endif /* !MALI_CUSTOMER_RELEASE */
 #include "mali_kbase_regs_history_debugfs.h"
 #include <mali_kbase_hwaccess_backend.h>
+#include <mali_kbase_hwaccess_time.h>
 #include <mali_kbase_hwaccess_jm.h>
 #include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_reset_gpu.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include "mali_kbase_ioctl.h"
+#include "mali_kbase_hwcnt_context.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_legacy.h"
+#include "mali_kbase_vinstr.h"
 
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 #include "mali_kbase_gwt.h"
 #endif
 
@@ -77,6 +84,7 @@
 #include <mali_kbase_sync.h>
 #endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/delay.h>
 #include <linux/log2.h>
 
@@ -89,7 +97,9 @@
 #include <linux/opp.h>
 #endif
 
-#include <mali_kbase_tlstream.h>
+#include <linux/pm_runtime.h>
+
+#include <mali_kbase_timeline.h>
 
 #include <mali_kbase_as_fault_debugfs.h>
 
@@ -105,9 +115,183 @@
 
 #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
 
-static int kbase_api_handshake(struct kbase_context *kctx,
+/**
+ * kbase_file_new - Create an object representing a device file
+ *
+ * @kbdev:  An instance of the GPU platform device, allocated from the probe
+ *          method of the driver.
+ * @filp:   Pointer to the struct file corresponding to device file
+ *          /dev/malixx instance, passed to the file's open method.
+ *
+ * In its initial state, the device file has no context (i.e. no GPU
+ * address space) and no API version number. Both must be assigned before
+ * kbase_file_get_kctx_if_setup_complete() can be used successfully.
+ *
+ * @return Address of an object representing a simulated device file, or NULL
+ *         on failure.
+ */
+static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev,
+	struct file *const filp)
+{
+	struct kbase_file *const kfile = kmalloc(sizeof(*kfile), GFP_KERNEL);
+
+	if (kfile) {
+		kfile->kbdev = kbdev;
+		kfile->filp = filp;
+		kfile->kctx = NULL;
+		kfile->api_version = 0;
+		atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN);
+	}
+	return kfile;
+}
+
+/**
+ * kbase_file_get_api_version - Set the application programmer interface version
+ *
+ * @kfile:  A device file created by kbase_file_new()
+ * @major:  Major version number (must not exceed 12 bits)
+ * @minor:  Major version number (must not exceed 12 bits)
+ *
+ * An application programmer interface (API) version must be specified
+ * before calling kbase_file_create_kctx(), otherwise an error is returned.
+ *
+ * If a version number was already set for the given @kfile (or is in the
+ * process of being set by another thread) then an error is returned.
+ *
+ * Return: 0 if successful, otherwise a negative error code.
+ */
+static int kbase_file_set_api_version(struct kbase_file *const kfile,
+	u16 const major, u16 const minor)
+{
+	if (WARN_ON(!kfile))
+		return -EINVAL;
+
+	/* setup pending, try to signal that we'll do the setup,
+	 * if setup was already in progress, err this call
+	 */
+	if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_VSN,
+		KBASE_FILE_VSN_IN_PROGRESS) != KBASE_FILE_NEED_VSN)
+		return -EPERM;
+
+	/* save the proposed version number for later use */
+	kfile->api_version = KBASE_API_VERSION(major, minor);
+
+	atomic_set(&kfile->setup_state, KBASE_FILE_NEED_CTX);
+	return 0;
+}
+
+/**
+ * kbase_file_get_api_version - Get the application programmer interface version
+ *
+ * @kfile:  A device file created by kbase_file_new()
+ *
+ * Return: The version number (encoded with KBASE_API_VERSION) or 0 if none has
+ *         been set.
+ */
+static unsigned long kbase_file_get_api_version(struct kbase_file *const kfile)
+{
+	if (WARN_ON(!kfile))
+		return 0;
+
+	if (atomic_read(&kfile->setup_state) < KBASE_FILE_NEED_CTX)
+		return 0;
+
+	return kfile->api_version;
+}
+
+/**
+ * kbase_file_create_kctx - Create a kernel base context
+ *
+ * @kfile:  A device file created by kbase_file_new()
+ * @flags:  Flags to set, which can be any combination of
+ *          BASEP_CONTEXT_CREATE_KERNEL_FLAGS.
+ *
+ * This creates a new context for the GPU platform device instance that was
+ * specified when kbase_file_new() was called. Each context has its own GPU
+ * address space. If a context was already created for the given @kfile (or is
+ * in the process of being created for it by another thread) then an error is
+ * returned.
+ *
+ * An API version number must have been set by kbase_file_set_api_version()
+ * before calling this function, otherwise an error is returned.
+ *
+ * Return: 0 if a new context was created, otherwise a negative error code.
+ */
+static int kbase_file_create_kctx(struct kbase_file *kfile,
+	base_context_create_flags flags);
+
+/**
+ * kbase_file_get_kctx_if_setup_complete - Get a kernel base context
+ *                                         pointer from a device file
+ *
+ * @kfile: A device file created by kbase_file_new()
+ *
+ * This function returns an error code (encoded with ERR_PTR) if no context
+ * has been created for the given @kfile. This makes it safe to use in
+ * circumstances where the order of initialization cannot be enforced, but
+ * only if the caller checks the return value.
+ *
+ * Return: Address of the kernel base context associated with the @kfile, or
+ *         NULL if no context exists.
+ */
+static struct kbase_context *kbase_file_get_kctx_if_setup_complete(
+	struct kbase_file *const kfile)
+{
+	if (WARN_ON(!kfile) ||
+		atomic_read(&kfile->setup_state) != KBASE_FILE_COMPLETE ||
+		WARN_ON(!kfile->kctx))
+		return NULL;
+
+	return kfile->kctx;
+}
+
+/**
+ * kbase_file_delete - Destroy an object representing a device file
+ *
+ * @kfile: A device file created by kbase_file_new()
+ *
+ * If any context was created for the @kfile then it is destroyed.
+ */
+static void kbase_file_delete(struct kbase_file *const kfile)
+{
+	struct kbase_device *kbdev = NULL;
+
+	if (WARN_ON(!kfile))
+		return;
+
+	kfile->filp->private_data = NULL;
+	kbdev = kfile->kbdev;
+
+	if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) {
+		struct kbase_context *kctx = kfile->kctx;
+
+#ifdef CONFIG_DEBUG_FS
+		kbasep_mem_profile_debugfs_remove(kctx);
+#endif
+
+		mutex_lock(&kctx->legacy_hwcnt_lock);
+		/* If this client was performing hardware counter dumping and
+		 * did not explicitly detach itself, destroy it now
+		 */
+		kbase_hwcnt_legacy_client_destroy(kctx->legacy_hwcnt_cli);
+		kctx->legacy_hwcnt_cli = NULL;
+		mutex_unlock(&kctx->legacy_hwcnt_lock);
+
+		kbase_destroy_context(kctx);
+
+		dev_dbg(kbdev->dev, "deleted base context\n");
+	}
+
+	kbase_release_device(kbdev);
+
+	kfree(kfile);
+}
+
+static int kbase_api_handshake(struct kbase_file *kfile,
 			       struct kbase_ioctl_version_check *version)
 {
+	int err = 0;
+
 	switch (version->major) {
 	case BASE_UK_VERSION_MAJOR:
 		/* set minor to be the lowest common */
@@ -126,9 +310,19 @@
 	}
 
 	/* save the proposed version number for later use */
-	kctx->api_version = KBASE_API_VERSION(version->major, version->minor);
+	err = kbase_file_set_api_version(kfile, version->major, version->minor);
+	if (unlikely(err))
+		return err;
 
-	return 0;
+	/* For backward compatibility, we may need to create the context before
+	 * the flags have been set. Originally it was created on file open
+	 * (with job submission disabled) but we don't support that usage.
+	 */
+	if (kbase_file_get_api_version(kfile) < KBASE_API_VERSION(11, 15))
+		err = kbase_file_create_kctx(kfile,
+			BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED);
+
+	return err;
 }
 
 /**
@@ -161,22 +355,25 @@
 #endif /* CONFIG_MALI_DEVFREQ */
 	inited_tlstream = (1u << 4),
 	inited_backend_early = (1u << 5),
-	inited_backend_late = (1u << 6),
-	inited_device = (1u << 7),
-	inited_vinstr = (1u << 8),
-	inited_job_fault = (1u << 10),
-	inited_sysfs_group = (1u << 11),
-	inited_misc_register = (1u << 12),
-	inited_get_device = (1u << 13),
-	inited_dev_list = (1u << 14),
-	inited_debugfs = (1u << 15),
-	inited_gpu_device = (1u << 16),
-	inited_registers_map = (1u << 17),
-	inited_io_history = (1u << 18),
-	inited_power_control = (1u << 19),
-	inited_buslogger = (1u << 20),
-	inited_protected = (1u << 21),
-	inited_ctx_sched = (1u << 22)
+	inited_hwcnt_gpu_iface = (1u << 6),
+	inited_hwcnt_gpu_ctx = (1u << 7),
+	inited_hwcnt_gpu_virt = (1u << 8),
+	inited_vinstr = (1u << 9),
+	inited_backend_late = (1u << 10),
+	inited_device = (1u << 11),
+	inited_job_fault = (1u << 13),
+	inited_sysfs_group = (1u << 14),
+	inited_misc_register = (1u << 15),
+	inited_get_device = (1u << 16),
+	inited_dev_list = (1u << 17),
+	inited_debugfs = (1u << 18),
+	inited_gpu_device = (1u << 19),
+	inited_registers_map = (1u << 20),
+	inited_io_history = (1u << 21),
+	inited_power_control = (1u << 22),
+	inited_buslogger = (1u << 23),
+	inited_protected = (1u << 24),
+	inited_ctx_sched = (1u << 25)
 };
 
 static struct kbase_device *to_kbase_device(struct device *dev)
@@ -204,11 +401,11 @@
 		}
 
 #ifdef CONFIG_OF
-		if (!strncasecmp(irq_res->name, "job", 4)) {
+		if (!strncmp(irq_res->name, "JOB", 4)) {
 			irqtag = JOB_IRQ_TAG;
-		} else if (!strncasecmp(irq_res->name, "mmu", 4)) {
+		} else if (!strncmp(irq_res->name, "MMU", 4)) {
 			irqtag = MMU_IRQ_TAG;
-		} else if (!strncasecmp(irq_res->name, "gpu", 4)) {
+		} else if (!strncmp(irq_res->name, "GPU", 4)) {
 			irqtag = GPU_IRQ_TAG;
 		} else {
 			dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
@@ -272,6 +469,7 @@
 }
 EXPORT_SYMBOL(kbase_release_device);
 
+#ifdef CONFIG_DEBUG_FS
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && \
 		!(LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 28) && \
 		LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0))
@@ -326,6 +524,7 @@
 }
 
 static const struct file_operations kbase_infinite_cache_fops = {
+	.owner = THIS_MODULE,
 	.open = simple_open,
 	.write = write_ctx_infinite_cache,
 	.read = read_ctx_infinite_cache,
@@ -375,48 +574,49 @@
 }
 
 static const struct file_operations kbase_force_same_va_fops = {
+	.owner = THIS_MODULE,
 	.open = simple_open,
 	.write = write_ctx_force_same_va,
 	.read = read_ctx_force_same_va,
 };
+#endif /* CONFIG_DEBUG_FS */
 
-static int kbase_open(struct inode *inode, struct file *filp)
+static int kbase_file_create_kctx(struct kbase_file *const kfile,
+	base_context_create_flags const flags)
 {
 	struct kbase_device *kbdev = NULL;
-	struct kbase_context *kctx;
-	int ret = 0;
+	struct kbase_context *kctx = NULL;
 #ifdef CONFIG_DEBUG_FS
 	char kctx_name[64];
 #endif
 
-	kbdev = kbase_find_device(iminor(inode));
+	if (WARN_ON(!kfile))
+		return -EINVAL;
 
-	if (!kbdev)
-		return -ENODEV;
+	/* setup pending, try to signal that we'll do the setup,
+	 * if setup was already in progress, err this call
+	 */
+	if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_NEED_CTX,
+		KBASE_FILE_CTX_IN_PROGRESS) != KBASE_FILE_NEED_CTX)
+		return -EPERM;
+
+	kbdev = kfile->kbdev;
 
 #if (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE)
-	kctx = kbase_create_context(kbdev, in_compat_syscall());
+	kctx = kbase_create_context(kbdev, in_compat_syscall(),
+		flags, kfile->api_version, kfile->filp);
 #else
-	kctx = kbase_create_context(kbdev, is_compat_task());
+	kctx = kbase_create_context(kbdev, is_compat_task(),
+		flags, kfile->api_version, kfile->filp);
 #endif /* (KERNEL_VERSION(4, 6, 0) <= LINUX_VERSION_CODE) */
-	if (!kctx) {
-		ret = -ENOMEM;
-		goto out;
-	}
 
-	init_waitqueue_head(&kctx->event_queue);
-	filp->private_data = kctx;
-	kctx->filp = filp;
+	/* if bad flags, will stay stuck in setup mode */
+	if (!kctx)
+		return -ENOMEM;
 
 	if (kbdev->infinite_cache_active_default)
 		kbase_ctx_flag_set(kctx, KCTX_INFINITE_CACHE);
 
-	/*
-	 * Allow large offsets as per commit be83bbf80682 ("mmap:
-	 * introduce sane default mmap limits")
-	 */
-	filp->f_mode |= FMODE_UNSIGNED_OFFSET;
-
 #ifdef CONFIG_DEBUG_FS
 	snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
 
@@ -424,47 +624,56 @@
 			kbdev->debugfs_ctx_directory);
 
 	if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
-		ret = -ENOMEM;
-		goto out;
+		/* we don't treat this as a fail - just warn about it */
+		dev_warn(kbdev->dev, "couldn't create debugfs dir for kctx\n");
+	} else {
+		debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
+				kctx, &kbase_infinite_cache_fops);
+		debugfs_create_file("force_same_va", 0600,
+				kctx->kctx_dentry, kctx,
+				&kbase_force_same_va_fops);
+
+		mutex_init(&kctx->mem_profile_lock);
+
+		kbasep_jd_debugfs_ctx_init(kctx);
+		kbase_debug_mem_view_init(kctx);
+
+		kbase_debug_job_fault_context_init(kctx);
+
+		kbase_mem_pool_debugfs_init(kctx->kctx_dentry, kctx);
+
+		kbase_jit_debugfs_init(kctx);
 	}
-
-	debugfs_create_file("infinite_cache", 0644, kctx->kctx_dentry,
-			kctx, &kbase_infinite_cache_fops);
-	debugfs_create_file("force_same_va", S_IRUSR | S_IWUSR,
-			kctx->kctx_dentry, kctx, &kbase_force_same_va_fops);
-
-	mutex_init(&kctx->mem_profile_lock);
-
-	kbasep_jd_debugfs_ctx_init(kctx);
-	kbase_debug_mem_view_init(filp);
-
-	kbase_debug_job_fault_context_init(kctx);
-
-	kbase_mem_pool_debugfs_init(kctx->kctx_dentry, &kctx->mem_pool, &kctx->lp_mem_pool);
-
-	kbase_jit_debugfs_init(kctx);
 #endif /* CONFIG_DEBUG_FS */
 
 	dev_dbg(kbdev->dev, "created base context\n");
 
-	{
-		struct kbasep_kctx_list_element *element;
+	kfile->kctx = kctx;
+	atomic_set(&kfile->setup_state, KBASE_FILE_COMPLETE);
 
-		element = kzalloc(sizeof(*element), GFP_KERNEL);
-		if (element) {
-			mutex_lock(&kbdev->kctx_list_lock);
-			element->kctx = kctx;
-			list_add(&element->link, &kbdev->kctx_list);
-			KBASE_TLSTREAM_TL_NEW_CTX(
-					element->kctx,
-					element->kctx->id,
-					(u32)(element->kctx->tgid));
-			mutex_unlock(&kbdev->kctx_list_lock);
-		} else {
-			/* we don't treat this as a fail - just warn about it */
-			dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
-		}
+	return 0;
+}
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+	struct kbase_device *kbdev = NULL;
+	struct kbase_file *kfile;
+	int ret = 0;
+
+	kbdev = kbase_find_device(iminor(inode));
+
+	if (!kbdev)
+		return -ENODEV;
+
+	kfile = kbase_file_new(kbdev, filp);
+	if (!kfile) {
+		ret = -ENOMEM;
+		goto out;
 	}
+
+	filp->private_data = kfile;
+	filp->f_mode |= FMODE_UNSIGNED_OFFSET;
+
 	return 0;
 
  out:
@@ -474,69 +683,54 @@
 
 static int kbase_release(struct inode *inode, struct file *filp)
 {
-	struct kbase_context *kctx = filp->private_data;
-	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbasep_kctx_list_element *element, *tmp;
-	bool found_element = false;
+	struct kbase_file *const kfile = filp->private_data;
 
-	KBASE_TLSTREAM_TL_DEL_CTX(kctx);
-
-#ifdef CONFIG_DEBUG_FS
-	kbasep_mem_profile_debugfs_remove(kctx);
-	kbase_debug_job_fault_context_term(kctx);
-#endif
-
-	mutex_lock(&kbdev->kctx_list_lock);
-	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
-		if (element->kctx == kctx) {
-			list_del(&element->link);
-			kfree(element);
-			found_element = true;
-		}
-	}
-	mutex_unlock(&kbdev->kctx_list_lock);
-	if (!found_element)
-		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
-
-	filp->private_data = NULL;
-
-	mutex_lock(&kctx->vinstr_cli_lock);
-	/* If this client was performing hwcnt dumping and did not explicitly
-	 * detach itself, remove it from the vinstr core now */
-	if (kctx->vinstr_cli) {
-		struct kbase_ioctl_hwcnt_enable enable;
-
-		enable.dump_buffer = 0llu;
-		kbase_vinstr_legacy_hwc_setup(
-				kbdev->vinstr_ctx, &kctx->vinstr_cli, &enable);
-	}
-	mutex_unlock(&kctx->vinstr_cli_lock);
-
-	kbase_destroy_context(kctx);
-
-	dev_dbg(kbdev->dev, "deleted base context\n");
-	kbase_release_device(kbdev);
+	kbase_file_delete(kfile);
 	return 0;
 }
 
-static int kbase_api_set_flags(struct kbase_context *kctx,
+static int kbase_api_set_flags(struct kbase_file *kfile,
 		struct kbase_ioctl_set_flags *flags)
 {
-	int err;
+	int err = 0;
+	unsigned long const api_version = kbase_file_get_api_version(kfile);
+	struct kbase_context *kctx = NULL;
 
-	/* setup pending, try to signal that we'll do the setup,
-	 * if setup was already in progress, err this call
-	 */
-	if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1) != 0)
+	/* Validate flags */
+	if (flags->create_flags !=
+		(flags->create_flags & BASEP_CONTEXT_CREATE_KERNEL_FLAGS))
 		return -EINVAL;
 
-	err = kbase_context_set_create_flags(kctx, flags->create_flags);
-	/* if bad flags, will stay stuck in setup mode */
-	if (err)
-		return err;
+	/* For backward compatibility, the context may have been created before
+	 * the flags were set.
+	 */
+	if (api_version >= KBASE_API_VERSION(11, 15)) {
+		err = kbase_file_create_kctx(kfile, flags->create_flags);
+	} else {
+		struct kbasep_js_kctx_info *js_kctx_info = NULL;
+		unsigned long irq_flags = 0;
 
-	atomic_set(&kctx->setup_complete, 1);
-	return 0;
+		/* If setup is incomplete (e.g. because the API version
+		 * wasn't set) then we have to give up.
+		 */
+		kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+		if (unlikely(!kctx))
+			return -EPERM;
+
+		js_kctx_info = &kctx->jctx.sched_info;
+		mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+		spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, irq_flags);
+
+		/* Translate the flags */
+		if ((flags->create_flags &
+			BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+			kbase_ctx_flag_clear(kctx, KCTX_SUBMIT_DISABLED);
+
+		spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, irq_flags);
+		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+	}
+
+	return err;
 }
 
 static int kbase_api_job_submit(struct kbase_context *kctx,
@@ -595,13 +789,18 @@
 	}
 	rcu_read_unlock();
 
-	if (flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+	if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
 		return -ENOMEM;
 
+	/* Force SAME_VA if a 64-bit client.
+	 * The only exception is GPU-executable memory if an EXEC_VA zone
+	 * has been initialized. In that case, GPU-executable memory may
+	 * or may not be SAME_VA.
+	 */
 	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
 			kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) {
-		/* force SAME_VA if a 64-bit client */
-		flags |= BASE_MEM_SAME_VA;
+		if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx))
+			flags |= BASE_MEM_SAME_VA;
 	}
 
 
@@ -635,13 +834,7 @@
 static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
 		struct kbase_ioctl_hwcnt_reader_setup *setup)
 {
-	int ret;
-
-	mutex_lock(&kctx->vinstr_cli_lock);
-	ret = kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
-	mutex_unlock(&kctx->vinstr_cli_lock);
-
-	return ret;
+	return kbase_vinstr_hwcnt_reader_setup(kctx->kbdev->vinstr_ctx, setup);
 }
 
 static int kbase_api_hwcnt_enable(struct kbase_context *kctx,
@@ -649,10 +842,31 @@
 {
 	int ret;
 
-	mutex_lock(&kctx->vinstr_cli_lock);
-	ret = kbase_vinstr_legacy_hwc_setup(kctx->kbdev->vinstr_ctx,
-			&kctx->vinstr_cli, enable);
-	mutex_unlock(&kctx->vinstr_cli_lock);
+	mutex_lock(&kctx->legacy_hwcnt_lock);
+	if (enable->dump_buffer != 0) {
+		/* Non-zero dump buffer, so user wants to create the client */
+		if (kctx->legacy_hwcnt_cli == NULL) {
+			ret = kbase_hwcnt_legacy_client_create(
+				kctx->kbdev->hwcnt_gpu_virt,
+				enable,
+				&kctx->legacy_hwcnt_cli);
+		} else {
+			/* This context already has a client */
+			ret = -EBUSY;
+		}
+	} else {
+		/* Zero dump buffer, so user wants to destroy the client */
+		if (kctx->legacy_hwcnt_cli != NULL) {
+			kbase_hwcnt_legacy_client_destroy(
+				kctx->legacy_hwcnt_cli);
+			kctx->legacy_hwcnt_cli = NULL;
+			ret = 0;
+		} else {
+			/* This context has no client to destroy */
+			ret = -EINVAL;
+		}
+	}
+	mutex_unlock(&kctx->legacy_hwcnt_lock);
 
 	return ret;
 }
@@ -661,10 +875,9 @@
 {
 	int ret;
 
-	mutex_lock(&kctx->vinstr_cli_lock);
-	ret = kbase_vinstr_hwc_dump(kctx->vinstr_cli,
-			BASE_HWCNT_READER_EVENT_MANUAL);
-	mutex_unlock(&kctx->vinstr_cli_lock);
+	mutex_lock(&kctx->legacy_hwcnt_lock);
+	ret = kbase_hwcnt_legacy_client_dump(kctx->legacy_hwcnt_cli);
+	mutex_unlock(&kctx->legacy_hwcnt_lock);
 
 	return ret;
 }
@@ -673,13 +886,44 @@
 {
 	int ret;
 
-	mutex_lock(&kctx->vinstr_cli_lock);
-	ret = kbase_vinstr_hwc_clear(kctx->vinstr_cli);
-	mutex_unlock(&kctx->vinstr_cli_lock);
+	mutex_lock(&kctx->legacy_hwcnt_lock);
+	ret = kbase_hwcnt_legacy_client_clear(kctx->legacy_hwcnt_cli);
+	mutex_unlock(&kctx->legacy_hwcnt_lock);
 
 	return ret;
 }
 
+static int kbase_api_get_cpu_gpu_timeinfo(struct kbase_context *kctx,
+		union kbase_ioctl_get_cpu_gpu_timeinfo *timeinfo)
+{
+	u32 flags = timeinfo->in.request_flags;
+	struct timespec ts;
+	u64 timestamp;
+	u64 cycle_cnt;
+
+	kbase_pm_context_active(kctx->kbdev);
+
+	kbase_backend_get_gpu_time(kctx->kbdev,
+		(flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG) ? &cycle_cnt : NULL,
+		(flags & BASE_TIMEINFO_TIMESTAMP_FLAG) ? &timestamp : NULL,
+		(flags & BASE_TIMEINFO_MONOTONIC_FLAG) ? &ts : NULL);
+
+	if (flags & BASE_TIMEINFO_TIMESTAMP_FLAG)
+		timeinfo->out.timestamp = timestamp;
+
+	if (flags & BASE_TIMEINFO_CYCLE_COUNTER_FLAG)
+		timeinfo->out.cycle_counter = cycle_cnt;
+
+	if (flags & BASE_TIMEINFO_MONOTONIC_FLAG) {
+		timeinfo->out.sec = ts.tv_sec;
+		timeinfo->out.nsec = ts.tv_nsec;
+	}
+
+	kbase_pm_context_idle(kctx->kbdev);
+
+	return 0;
+}
+
 #ifdef CONFIG_MALI_NO_MALI
 static int kbase_api_hwcnt_set(struct kbase_context *kctx,
 		struct kbase_ioctl_hwcnt_values *values)
@@ -733,7 +977,7 @@
 
 	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
 			DEFAULT_MAX_JIT_ALLOCATIONS,
-			JIT_LEGACY_TRIM_LEVEL);
+			JIT_LEGACY_TRIM_LEVEL, BASE_MEM_GROUP_DEFAULT);
 }
 
 static int kbase_api_mem_jit_init(struct kbase_context *kctx,
@@ -752,7 +996,14 @@
 	}
 
 	return kbase_region_tracker_init_jit(kctx, jit_init->va_pages,
-			jit_init->max_allocations, jit_init->trim_level);
+			jit_init->max_allocations, jit_init->trim_level,
+			jit_init->group_id);
+}
+
+static int kbase_api_mem_exec_init(struct kbase_context *kctx,
+		struct kbase_ioctl_mem_exec_init *exec_init)
+{
+	return kbase_region_tracker_init_exec(kctx, exec_init->va_pages);
 }
 
 static int kbase_api_mem_sync(struct kbase_context *kctx,
@@ -800,12 +1051,12 @@
 static int kbase_api_tlstream_acquire(struct kbase_context *kctx,
 		struct kbase_ioctl_tlstream_acquire *acquire)
 {
-	return kbase_tlstream_acquire(kctx, acquire->flags);
+	return kbase_timeline_io_acquire(kctx->kbdev, acquire->flags);
 }
 
 static int kbase_api_tlstream_flush(struct kbase_context *kctx)
 {
-	kbase_tlstream_flush_streams();
+	kbase_timeline_streams_flush(kctx->kbdev->timeline);
 
 	return 0;
 }
@@ -842,7 +1093,7 @@
 	}
 
 	flags = alias->in.flags;
-	if (flags & BASE_MEM_FLAGS_KERNEL_ONLY) {
+	if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY) {
 		vfree(ai);
 		return -EINVAL;
 	}
@@ -867,7 +1118,7 @@
 	int ret;
 	u64 flags = import->in.flags;
 
-	if (flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+	if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
 		return -ENOMEM;
 
 	ret = kbase_mem_import(kctx,
@@ -886,7 +1137,7 @@
 static int kbase_api_mem_flags_change(struct kbase_context *kctx,
 		struct kbase_ioctl_mem_flags_change *change)
 {
-	if (change->flags & BASE_MEM_FLAGS_KERNEL_ONLY)
+	if (change->flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
 		return -ENOMEM;
 
 	return kbase_mem_flags_change(kctx, change->gpu_va,
@@ -925,24 +1176,6 @@
 #endif
 }
 
-static int kbase_api_get_profiling_controls(struct kbase_context *kctx,
-		struct kbase_ioctl_get_profiling_controls *controls)
-{
-	int ret;
-
-	if (controls->count > (FBDUMP_CONTROL_MAX - FBDUMP_CONTROL_MIN))
-		return -EINVAL;
-
-	ret = copy_to_user(u64_to_user_ptr(controls->buffer),
-			&kctx->kbdev->kbase_profiling_controls[
-				FBDUMP_CONTROL_MIN],
-			controls->count * sizeof(u32));
-
-	if (ret)
-		return -EFAULT;
-	return 0;
-}
-
 static int kbase_api_mem_profile_add(struct kbase_context *kctx,
 		struct kbase_ioctl_mem_profile_add *data)
 {
@@ -1049,7 +1282,8 @@
 static int kbase_api_tlstream_test(struct kbase_context *kctx,
 		struct kbase_ioctl_tlstream_test *test)
 {
-	kbase_tlstream_test(
+	kbase_timeline_test(
+			kctx->kbdev,
 			test->tpw_count,
 			test->msg_delay,
 			test->msg_count,
@@ -1061,7 +1295,7 @@
 static int kbase_api_tlstream_stats(struct kbase_context *kctx,
 		struct kbase_ioctl_tlstream_stats *stats)
 {
-	kbase_tlstream_stats(
+	kbase_timeline_stats(kctx->kbdev->timeline,
 			&stats->bytes_collected,
 			&stats->bytes_generated);
 
@@ -1070,57 +1304,59 @@
 #endif /* MALI_UNIT_TEST */
 
 
-#define KBASE_HANDLE_IOCTL(cmd, function)                          \
-	do {                                                       \
-		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE);          \
-		return function(kctx);                             \
+#define KBASE_HANDLE_IOCTL(cmd, function, arg)    \
+	do {                                          \
+		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_NONE); \
+		return function(arg);                     \
 	} while (0)
 
-#define KBASE_HANDLE_IOCTL_IN(cmd, function, type)                 \
-	do {                                                       \
+#define KBASE_HANDLE_IOCTL_IN(cmd, function, type, arg)    \
+	do {                                                   \
 		type param;                                        \
 		int err;                                           \
 		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_WRITE);         \
 		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
 		err = copy_from_user(&param, uarg, sizeof(param)); \
 		if (err)                                           \
-			return -EFAULT;                            \
-		return function(kctx, &param);                     \
+			return -EFAULT;                                \
+		return function(arg, &param);                      \
 	} while (0)
 
-#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type)                \
-	do {                                                       \
+#define KBASE_HANDLE_IOCTL_OUT(cmd, function, type, arg)   \
+	do {                                                   \
 		type param;                                        \
 		int ret, err;                                      \
 		BUILD_BUG_ON(_IOC_DIR(cmd) != _IOC_READ);          \
 		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));     \
-		ret = function(kctx, &param);                      \
+		memset(&param, 0, sizeof(param));                  \
+		ret = function(arg, &param);                       \
 		err = copy_to_user(uarg, &param, sizeof(param));   \
 		if (err)                                           \
-			return -EFAULT;                            \
+			return -EFAULT;                                \
 		return ret;                                        \
 	} while (0)
 
-#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type)                  \
-	do {                                                           \
+#define KBASE_HANDLE_IOCTL_INOUT(cmd, function, type, arg)     \
+	do {                                                       \
 		type param;                                            \
 		int ret, err;                                          \
 		BUILD_BUG_ON(_IOC_DIR(cmd) != (_IOC_WRITE|_IOC_READ)); \
 		BUILD_BUG_ON(sizeof(param) != _IOC_SIZE(cmd));         \
 		err = copy_from_user(&param, uarg, sizeof(param));     \
 		if (err)                                               \
-			return -EFAULT;                                \
-		ret = function(kctx, &param);                          \
+			return -EFAULT;                                    \
+		ret = function(arg, &param);                           \
 		err = copy_to_user(uarg, &param, sizeof(param));       \
 		if (err)                                               \
-			return -EFAULT;                                \
+			return -EFAULT;                                    \
 		return ret;                                            \
 	} while (0)
 
 static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-	struct kbase_context *kctx = filp->private_data;
-	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *kctx = NULL;
+	struct kbase_device *kbdev = kfile->kbdev;
 	void __user *uarg = (void __user *)arg;
 
 	/* Only these ioctls are available until setup is complete */
@@ -1128,207 +1364,252 @@
 	case KBASE_IOCTL_VERSION_CHECK:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_VERSION_CHECK,
 				kbase_api_handshake,
-				struct kbase_ioctl_version_check);
+				struct kbase_ioctl_version_check,
+				kfile);
 		break;
 
 	case KBASE_IOCTL_SET_FLAGS:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SET_FLAGS,
 				kbase_api_set_flags,
-				struct kbase_ioctl_set_flags);
+				struct kbase_ioctl_set_flags,
+				kfile);
 		break;
 	}
 
-	/* Block call until version handshake and setup is complete */
-	if (kctx->api_version == 0 || !atomic_read(&kctx->setup_complete))
-		return -EINVAL;
+	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+	if (unlikely(!kctx))
+		return -EPERM;
 
 	/* Normal ioctls */
 	switch (cmd) {
 	case KBASE_IOCTL_JOB_SUBMIT:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_JOB_SUBMIT,
 				kbase_api_job_submit,
-				struct kbase_ioctl_job_submit);
+				struct kbase_ioctl_job_submit,
+				kctx);
 		break;
 	case KBASE_IOCTL_GET_GPUPROPS:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS,
 				kbase_api_get_gpuprops,
-				struct kbase_ioctl_get_gpuprops);
+				struct kbase_ioctl_get_gpuprops,
+				kctx);
 		break;
 	case KBASE_IOCTL_POST_TERM:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM,
-				kbase_api_post_term);
+				kbase_api_post_term,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_ALLOC:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC,
 				kbase_api_mem_alloc,
-				union kbase_ioctl_mem_alloc);
+				union kbase_ioctl_mem_alloc,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_QUERY:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY,
 				kbase_api_mem_query,
-				union kbase_ioctl_mem_query);
+				union kbase_ioctl_mem_query,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_FREE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FREE,
 				kbase_api_mem_free,
-				struct kbase_ioctl_mem_free);
+				struct kbase_ioctl_mem_free,
+				kctx);
 		break;
 	case KBASE_IOCTL_DISJOINT_QUERY:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_DISJOINT_QUERY,
 				kbase_api_disjoint_query,
-				struct kbase_ioctl_disjoint_query);
+				struct kbase_ioctl_disjoint_query,
+				kctx);
 		break;
 	case KBASE_IOCTL_GET_DDK_VERSION:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_DDK_VERSION,
 				kbase_api_get_ddk_version,
-				struct kbase_ioctl_get_ddk_version);
+				struct kbase_ioctl_get_ddk_version,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_JIT_INIT_OLD:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT_OLD,
 				kbase_api_mem_jit_init_old,
-				struct kbase_ioctl_mem_jit_init_old);
+				struct kbase_ioctl_mem_jit_init_old,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_JIT_INIT:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_JIT_INIT,
 				kbase_api_mem_jit_init,
-				struct kbase_ioctl_mem_jit_init);
+				struct kbase_ioctl_mem_jit_init,
+				kctx);
+		break;
+	case KBASE_IOCTL_MEM_EXEC_INIT:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_EXEC_INIT,
+				kbase_api_mem_exec_init,
+				struct kbase_ioctl_mem_exec_init,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_SYNC:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_SYNC,
 				kbase_api_mem_sync,
-				struct kbase_ioctl_mem_sync);
+				struct kbase_ioctl_mem_sync,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_FIND_CPU_OFFSET:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_CPU_OFFSET,
 				kbase_api_mem_find_cpu_offset,
-				union kbase_ioctl_mem_find_cpu_offset);
+				union kbase_ioctl_mem_find_cpu_offset,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_FIND_GPU_START_AND_OFFSET,
 				kbase_api_mem_find_gpu_start_and_offset,
-				union kbase_ioctl_mem_find_gpu_start_and_offset);
+				union kbase_ioctl_mem_find_gpu_start_and_offset,
+				kctx);
 		break;
 	case KBASE_IOCTL_GET_CONTEXT_ID:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_GET_CONTEXT_ID,
 				kbase_api_get_context_id,
-				struct kbase_ioctl_get_context_id);
+				struct kbase_ioctl_get_context_id,
+				kctx);
 		break;
 	case KBASE_IOCTL_TLSTREAM_ACQUIRE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_ACQUIRE,
 				kbase_api_tlstream_acquire,
-				struct kbase_ioctl_tlstream_acquire);
+				struct kbase_ioctl_tlstream_acquire,
+				kctx);
 		break;
 	case KBASE_IOCTL_TLSTREAM_FLUSH:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_TLSTREAM_FLUSH,
-				kbase_api_tlstream_flush);
+				kbase_api_tlstream_flush,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_COMMIT:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_COMMIT,
 				kbase_api_mem_commit,
-				struct kbase_ioctl_mem_commit);
+				struct kbase_ioctl_mem_commit,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_ALIAS:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALIAS,
 				kbase_api_mem_alias,
-				union kbase_ioctl_mem_alias);
+				union kbase_ioctl_mem_alias,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_IMPORT:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_IMPORT,
 				kbase_api_mem_import,
-				union kbase_ioctl_mem_import);
+				union kbase_ioctl_mem_import,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_FLAGS_CHANGE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_FLAGS_CHANGE,
 				kbase_api_mem_flags_change,
-				struct kbase_ioctl_mem_flags_change);
+				struct kbase_ioctl_mem_flags_change,
+				kctx);
 		break;
 	case KBASE_IOCTL_STREAM_CREATE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STREAM_CREATE,
 				kbase_api_stream_create,
-				struct kbase_ioctl_stream_create);
+				struct kbase_ioctl_stream_create,
+				kctx);
 		break;
 	case KBASE_IOCTL_FENCE_VALIDATE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_FENCE_VALIDATE,
 				kbase_api_fence_validate,
-				struct kbase_ioctl_fence_validate);
-		break;
-	case KBASE_IOCTL_GET_PROFILING_CONTROLS:
-		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_PROFILING_CONTROLS,
-				kbase_api_get_profiling_controls,
-				struct kbase_ioctl_get_profiling_controls);
+				struct kbase_ioctl_fence_validate,
+				kctx);
 		break;
 	case KBASE_IOCTL_MEM_PROFILE_ADD:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_MEM_PROFILE_ADD,
 				kbase_api_mem_profile_add,
-				struct kbase_ioctl_mem_profile_add);
+				struct kbase_ioctl_mem_profile_add,
+				kctx);
 		break;
 	case KBASE_IOCTL_SOFT_EVENT_UPDATE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_SOFT_EVENT_UPDATE,
 				kbase_api_soft_event_update,
-				struct kbase_ioctl_soft_event_update);
+				struct kbase_ioctl_soft_event_update,
+				kctx);
 		break;
 	case KBASE_IOCTL_STICKY_RESOURCE_MAP:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_MAP,
 				kbase_api_sticky_resource_map,
-				struct kbase_ioctl_sticky_resource_map);
+				struct kbase_ioctl_sticky_resource_map,
+				kctx);
 		break;
 	case KBASE_IOCTL_STICKY_RESOURCE_UNMAP:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_STICKY_RESOURCE_UNMAP,
 				kbase_api_sticky_resource_unmap,
-				struct kbase_ioctl_sticky_resource_unmap);
+				struct kbase_ioctl_sticky_resource_unmap,
+				kctx);
 		break;
 
 	/* Instrumentation. */
 	case KBASE_IOCTL_HWCNT_READER_SETUP:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
 				kbase_api_hwcnt_reader_setup,
-				struct kbase_ioctl_hwcnt_reader_setup);
+				struct kbase_ioctl_hwcnt_reader_setup,
+				kctx);
 		break;
 	case KBASE_IOCTL_HWCNT_ENABLE:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_ENABLE,
 				kbase_api_hwcnt_enable,
-				struct kbase_ioctl_hwcnt_enable);
+				struct kbase_ioctl_hwcnt_enable,
+				kctx);
 		break;
 	case KBASE_IOCTL_HWCNT_DUMP:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_DUMP,
-				kbase_api_hwcnt_dump);
+				kbase_api_hwcnt_dump,
+				kctx);
 		break;
 	case KBASE_IOCTL_HWCNT_CLEAR:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_HWCNT_CLEAR,
-				kbase_api_hwcnt_clear);
+				kbase_api_hwcnt_clear,
+				kctx);
+		break;
+	case KBASE_IOCTL_GET_CPU_GPU_TIMEINFO:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_GET_CPU_GPU_TIMEINFO,
+				kbase_api_get_cpu_gpu_timeinfo,
+				union kbase_ioctl_get_cpu_gpu_timeinfo,
+				kctx);
 		break;
 #ifdef CONFIG_MALI_NO_MALI
 	case KBASE_IOCTL_HWCNT_SET:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_SET,
 				kbase_api_hwcnt_set,
-				struct kbase_ioctl_hwcnt_values);
+				struct kbase_ioctl_hwcnt_values,
+				kctx);
 		break;
 #endif
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 	case KBASE_IOCTL_CINSTR_GWT_START:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_START,
-				kbase_gpu_gwt_start);
+				kbase_gpu_gwt_start,
+				kctx);
 		break;
 	case KBASE_IOCTL_CINSTR_GWT_STOP:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_CINSTR_GWT_STOP,
-				kbase_gpu_gwt_stop);
+				kbase_gpu_gwt_stop,
+				kctx);
 		break;
 	case KBASE_IOCTL_CINSTR_GWT_DUMP:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CINSTR_GWT_DUMP,
 				kbase_gpu_gwt_dump,
-				union kbase_ioctl_cinstr_gwt_dump);
+				union kbase_ioctl_cinstr_gwt_dump,
+				kctx);
 		break;
 #endif
 #if MALI_UNIT_TEST
 	case KBASE_IOCTL_TLSTREAM_TEST:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_TLSTREAM_TEST,
 				kbase_api_tlstream_test,
-				struct kbase_ioctl_tlstream_test);
+				struct kbase_ioctl_tlstream_test,
+				kctx);
 		break;
 	case KBASE_IOCTL_TLSTREAM_STATS:
 		KBASE_HANDLE_IOCTL_OUT(KBASE_IOCTL_TLSTREAM_STATS,
 				kbase_api_tlstream_stats,
-				struct kbase_ioctl_tlstream_stats);
+				struct kbase_ioctl_tlstream_stats,
+				kctx);
 		break;
 #endif
 	}
@@ -1340,10 +1621,15 @@
 
 static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
 {
-	struct kbase_context *kctx = filp->private_data;
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *const kctx =
+		kbase_file_get_kctx_if_setup_complete(kfile);
 	struct base_jd_event_v2 uevent;
 	int out_count = 0;
 
+	if (unlikely(!kctx))
+		return -EPERM;
+
 	if (count < sizeof(uevent))
 		return -ENOBUFS;
 
@@ -1379,7 +1665,12 @@
 
 static unsigned int kbase_poll(struct file *filp, poll_table *wait)
 {
-	struct kbase_context *kctx = filp->private_data;
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *const kctx =
+		kbase_file_get_kctx_if_setup_complete(kfile);
+
+	if (unlikely(!kctx))
+		return POLLERR;
 
 	poll_wait(filp, &kctx->event_queue, wait);
 	if (kbase_event_pending(kctx))
@@ -1397,6 +1688,18 @@
 
 KBASE_EXPORT_TEST_API(kbase_event_wakeup);
 
+static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma)
+{
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *const kctx =
+		kbase_file_get_kctx_if_setup_complete(kfile);
+
+	if (unlikely(!kctx))
+		return -EPERM;
+
+	return kbase_context_mmap(kctx, vma);
+}
+
 static int kbase_check_flags(int flags)
 {
 	/* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
@@ -1408,6 +1711,20 @@
 	return 0;
 }
 
+static unsigned long kbase_get_unmapped_area(struct file *const filp,
+		const unsigned long addr, const unsigned long len,
+		const unsigned long pgoff, const unsigned long flags)
+{
+	struct kbase_file *const kfile = filp->private_data;
+	struct kbase_context *const kctx =
+		kbase_file_get_kctx_if_setup_complete(kfile);
+
+	if (unlikely(!kctx))
+		return -EPERM;
+
+	return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags);
+}
+
 static const struct file_operations kbase_fops = {
 	.owner = THIS_MODULE,
 	.open = kbase_open,
@@ -1450,7 +1767,7 @@
 
 	current_policy = kbase_pm_get_policy(kbdev);
 
-	policy_count = kbase_pm_list_policies(&policy_list);
+	policy_count = kbase_pm_list_policies(kbdev, &policy_list);
 
 	for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
 		if (policy_list[i] == current_policy)
@@ -1498,7 +1815,7 @@
 	if (!kbdev)
 		return -ENODEV;
 
-	policy_count = kbase_pm_list_policies(&policy_list);
+	policy_count = kbase_pm_list_policies(kbdev, &policy_list);
 
 	for (i = 0; i < policy_count; i++) {
 		if (sysfs_streq(policy_list[i]->name, buf)) {
@@ -1579,7 +1896,10 @@
 {
 	struct kbase_device *kbdev;
 	u64 new_core_mask[3];
-	int items;
+	int items, i;
+	ssize_t err = count;
+	unsigned long flags;
+	u64 shader_present, group0_core_mask;
 
 	kbdev = to_kbase_device(dev);
 
@@ -1590,50 +1910,59 @@
 			&new_core_mask[0], &new_core_mask[1],
 			&new_core_mask[2]);
 
+	if (items != 1 && items != 3) {
+		dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"
+			"Use format <core_mask>\n"
+			"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+		err = -EINVAL;
+		goto end;
+	}
+
 	if (items == 1)
 		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
 
-	if (items == 1 || items == 3) {
-		u64 shader_present =
-				kbdev->gpu_props.props.raw_props.shader_present;
-		u64 group0_core_mask =
-				kbdev->gpu_props.props.coherency_info.group[0].
-				core_mask;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-		if ((new_core_mask[0] & shader_present) != new_core_mask[0] ||
-				!(new_core_mask[0] & group0_core_mask) ||
-			(new_core_mask[1] & shader_present) !=
-						new_core_mask[1] ||
-				!(new_core_mask[1] & group0_core_mask) ||
-			(new_core_mask[2] & shader_present) !=
-						new_core_mask[2] ||
-				!(new_core_mask[2] & group0_core_mask)) {
-			dev_err(dev, "power_policy: invalid core specification\n");
-			return -EINVAL;
+	shader_present = kbdev->gpu_props.props.raw_props.shader_present;
+	group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+	for (i = 0; i < 3; ++i) {
+		if ((new_core_mask[i] & shader_present) != new_core_mask[i]) {
+			dev_err(dev, "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)",
+					new_core_mask[i], i, shader_present);
+			err = -EINVAL;
+			goto unlock;
+
+		} else if (!(new_core_mask[i] & shader_present & kbdev->pm.backend.ca_cores_enabled)) {
+			dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n",
+					new_core_mask[i], i,
+					kbdev->gpu_props.props.raw_props.shader_present,
+					kbdev->pm.backend.ca_cores_enabled);
+			err = -EINVAL;
+			goto unlock;
+
+		} else if (!(new_core_mask[i] & group0_core_mask)) {
+			dev_err(dev, "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n",
+					new_core_mask[i], i, group0_core_mask);
+			err = -EINVAL;
+			goto unlock;
 		}
-
-		if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
-				kbdev->pm.debug_core_mask[1] !=
-						new_core_mask[1] ||
-				kbdev->pm.debug_core_mask[2] !=
-						new_core_mask[2]) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-			kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
-					new_core_mask[1], new_core_mask[2]);
-
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		}
-
-		return count;
 	}
 
-	dev_err(kbdev->dev, "Couldn't process set_core_mask write operation.\n"
-		"Use format <core_mask>\n"
-		"or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
-	return -EINVAL;
+	if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
+			kbdev->pm.debug_core_mask[1] !=
+					new_core_mask[1] ||
+			kbdev->pm.debug_core_mask[2] !=
+					new_core_mask[2]) {
+
+		kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
+				new_core_mask[1], new_core_mask[2]);
+	}
+
+unlock:
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+end:
+	return err;
 }
 
 /*
@@ -2035,109 +2364,6 @@
 static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
 		show_js_scheduling_period, set_js_scheduling_period);
 
-#if !MALI_CUSTOMER_RELEASE
-/**
- * set_force_replay - Store callback for the force_replay sysfs file.
- *
- * @dev:	The device with sysfs file is for
- * @attr:	The attributes of the sysfs file
- * @buf:	The value written to the sysfs file
- * @count:	The number of bytes written to the sysfs file
- *
- * Return: @count if the function succeeded. An error code on failure.
- */
-static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct kbase_device *kbdev;
-
-	kbdev = to_kbase_device(dev);
-	if (!kbdev)
-		return -ENODEV;
-
-	if (!strncmp("limit=", buf, MIN(6, count))) {
-		int force_replay_limit;
-		int items = sscanf(buf, "limit=%u", &force_replay_limit);
-
-		if (items == 1) {
-			kbdev->force_replay_random = false;
-			kbdev->force_replay_limit = force_replay_limit;
-			kbdev->force_replay_count = 0;
-
-			return count;
-		}
-	} else if (!strncmp("random_limit", buf, MIN(12, count))) {
-		kbdev->force_replay_random = true;
-		kbdev->force_replay_count = 0;
-
-		return count;
-	} else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
-		kbdev->force_replay_random = false;
-		kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
-		kbdev->force_replay_count = 0;
-
-		return count;
-	} else if (!strncmp("core_req=", buf, MIN(9, count))) {
-		unsigned int core_req;
-		int items = sscanf(buf, "core_req=%x", &core_req);
-
-		if (items == 1) {
-			kbdev->force_replay_core_req = (base_jd_core_req)core_req;
-
-			return count;
-		}
-	}
-	dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
-	return -EINVAL;
-}
-
-/**
- * show_force_replay - Show callback for the force_replay sysfs file.
- *
- * This function is called to get the contents of the force_replay sysfs
- * file. It returns the last set value written to the force_replay sysfs file.
- * If the file didn't get written yet, the values will be 0.
- *
- * @dev:	The device this sysfs file is for
- * @attr:	The attributes of the sysfs file
- * @buf:	The output buffer for the sysfs file contents
- *
- * Return: The number of bytes output to @buf.
- */
-static ssize_t show_force_replay(struct device *dev,
-		struct device_attribute *attr, char * const buf)
-{
-	struct kbase_device *kbdev;
-	ssize_t ret;
-
-	kbdev = to_kbase_device(dev);
-	if (!kbdev)
-		return -ENODEV;
-
-	if (kbdev->force_replay_random)
-		ret = scnprintf(buf, PAGE_SIZE,
-				"limit=0\nrandom_limit\ncore_req=%x\n",
-				kbdev->force_replay_core_req);
-	else
-		ret = scnprintf(buf, PAGE_SIZE,
-				"limit=%u\nnorandom_limit\ncore_req=%x\n",
-				kbdev->force_replay_limit,
-				kbdev->force_replay_core_req);
-
-	if (ret >= PAGE_SIZE) {
-		buf[PAGE_SIZE - 2] = '\n';
-		buf[PAGE_SIZE - 1] = '\0';
-		ret = PAGE_SIZE - 1;
-	}
-
-	return ret;
-}
-
-/*
- * The sysfs file force_replay.
- */
-static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
-		set_force_replay);
-#endif /* !MALI_CUSTOMER_RELEASE */
 
 #ifdef CONFIG_MALI_DEBUG
 static ssize_t set_js_softstop_always(struct device *dev,
@@ -2321,14 +2547,6 @@
 		unsigned id;
 		char *name;
 	} gpu_product_id_names[] = {
-		{ .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
-		{ .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
-		{ .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
-		{ .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
-		{ .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
-		{ .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
-		{ .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
-		{ .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
 		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G71" },
 		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
@@ -2341,13 +2559,24 @@
 		  .name = "Mali-G31" },
 		{ .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G52" },
+		{ .id = GPU_ID2_PRODUCT_TTRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-G77" },
+		{ .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-TBEX" },
+		{ .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-LBEX" },
+		{ .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-TNAX" },
+		{ .id = GPU_ID2_PRODUCT_TODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-TODX" },
+		{ .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		  .name = "Mali-LODX" },
 	};
 	const char *product_name = "(Unknown Mali GPU)";
 	struct kbase_device *kbdev;
 	u32 gpu_id;
 	unsigned product_id, product_id_mask;
 	unsigned i;
-	bool is_new_format;
 
 	kbdev = to_kbase_device(dev);
 	if (!kbdev)
@@ -2355,18 +2584,12 @@
 
 	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
 	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
-	is_new_format = GPU_ID_IS_NEW_FORMAT(product_id);
-	product_id_mask =
-		(is_new_format ?
-			GPU_ID2_PRODUCT_MODEL :
-			GPU_ID_VERSION_PRODUCT_ID) >>
-		GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	product_id_mask = GPU_ID2_PRODUCT_MODEL >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
 	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
 		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
 
-		if ((GPU_ID_IS_NEW_FORMAT(p->id) == is_new_format) &&
-		    (p->id & product_id_mask) ==
+		if ((p->id & product_id_mask) ==
 		    (product_id & product_id_mask)) {
 			product_name = p->name;
 			break;
@@ -2467,9 +2690,11 @@
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
+	struct kbasep_pm_tick_timer_state *stt;
 	int items;
-	s64 gpu_poweroff_time;
-	int poweroff_shader_ticks, poweroff_gpu_ticks;
+	u64 gpu_poweroff_time;
+	unsigned int poweroff_shader_ticks, poweroff_gpu_ticks;
+	unsigned long flags;
 
 	kbdev = to_kbase_device(dev);
 	if (!kbdev)
@@ -2484,9 +2709,16 @@
 		return -EINVAL;
 	}
 
-	kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
-	kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
-	kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	stt = &kbdev->pm.backend.shader_tick_timer;
+	stt->configured_interval = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+	stt->configured_ticks = poweroff_shader_ticks;
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (poweroff_gpu_ticks != 0)
+		dev_warn(kbdev->dev, "Separate GPU poweroff delay no longer supported.\n");
 
 	return count;
 }
@@ -2506,16 +2738,22 @@
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
+	struct kbasep_pm_tick_timer_state *stt;
 	ssize_t ret;
+	unsigned long flags;
 
 	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
-			ktime_to_ns(kbdev->pm.gpu_poweroff_time),
-			kbdev->pm.poweroff_shader_ticks,
-			kbdev->pm.poweroff_gpu_ticks);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	stt = &kbdev->pm.backend.shader_tick_timer;
+	ret = scnprintf(buf, PAGE_SIZE, "%llu %u 0\n",
+			ktime_to_ns(stt->configured_interval),
+			stt->configured_ticks);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	return ret;
 }
@@ -2588,41 +2826,33 @@
 		set_reset_timeout);
 
 
-
 static ssize_t show_mem_pool_size(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
-	struct kbase_device *kbdev;
-	ssize_t ret;
+	struct kbase_device *const kbdev = to_kbase_device(dev);
 
-	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
-			kbase_mem_pool_size(&kbdev->mem_pool));
-
-	return ret;
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_size);
 }
 
 static ssize_t set_mem_pool_size(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct kbase_device *kbdev;
-	size_t new_size;
+	struct kbase_device *const kbdev = to_kbase_device(dev);
 	int err;
 
-	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	err = kstrtoul(buf, 0, (unsigned long *)&new_size);
-	if (err)
-		return err;
+	err = kbase_debugfs_helper_set_attr_from_string(buf,
+		kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_trim);
 
-	kbase_mem_pool_trim(&kbdev->mem_pool, new_size);
-
-	return count;
+	return err ? err : count;
 }
 
 static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
@@ -2631,37 +2861,30 @@
 static ssize_t show_mem_pool_max_size(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
-	struct kbase_device *kbdev;
-	ssize_t ret;
+	struct kbase_device *const kbdev = to_kbase_device(dev);
 
-	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	ret = scnprintf(buf, PAGE_SIZE, "%zu\n",
-			kbase_mem_pool_max_size(&kbdev->mem_pool));
-
-	return ret;
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_max_size);
 }
 
 static ssize_t set_mem_pool_max_size(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct kbase_device *kbdev;
-	size_t new_max_size;
+	struct kbase_device *const kbdev = to_kbase_device(dev);
 	int err;
 
-	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	err = kstrtoul(buf, 0, (unsigned long *)&new_max_size);
-	if (err)
-		return -EINVAL;
+	err = kbase_debugfs_helper_set_attr_from_string(buf,
+		kbdev->mem_pools.small, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_set_max_size);
 
-	kbase_mem_pool_set_max_size(&kbdev->mem_pool, new_max_size);
-
-	return count;
+	return err ? err : count;
 }
 
 static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
@@ -2680,13 +2903,14 @@
 static ssize_t show_lp_mem_pool_size(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
-	struct kbase_device *kbdev;
+	struct kbase_device *const kbdev = to_kbase_device(dev);
 
-	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_size(&kbdev->lp_mem_pool));
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_size);
 }
 
 /**
@@ -2704,21 +2928,17 @@
 static ssize_t set_lp_mem_pool_size(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct kbase_device *kbdev;
-	unsigned long new_size;
+	struct kbase_device *const kbdev = to_kbase_device(dev);
 	int err;
 
-	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	err = kstrtoul(buf, 0, &new_size);
-	if (err)
-		return err;
+	err = kbase_debugfs_helper_set_attr_from_string(buf,
+		kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_trim);
 
-	kbase_mem_pool_trim(&kbdev->lp_mem_pool, new_size);
-
-	return count;
+	return err ? err : count;
 }
 
 static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size,
@@ -2737,13 +2957,14 @@
 static ssize_t show_lp_mem_pool_max_size(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
-	struct kbase_device *kbdev;
+	struct kbase_device *const kbdev = to_kbase_device(dev);
 
-	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	return scnprintf(buf, PAGE_SIZE, "%zu\n", kbase_mem_pool_max_size(&kbdev->lp_mem_pool));
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_max_size);
 }
 
 /**
@@ -2760,21 +2981,17 @@
 static ssize_t set_lp_mem_pool_max_size(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct kbase_device *kbdev;
-	unsigned long new_max_size;
+	struct kbase_device *const kbdev = to_kbase_device(dev);
 	int err;
 
-	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	err = kstrtoul(buf, 0, &new_max_size);
-	if (err)
-		return -EINVAL;
+	err = kbase_debugfs_helper_set_attr_from_string(buf,
+		kbdev->mem_pools.large, MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_set_max_size);
 
-	kbase_mem_pool_set_max_size(&kbdev->lp_mem_pool, new_max_size);
-
-	return count;
+	return err ? err : count;
 }
 
 static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
@@ -2820,7 +3037,7 @@
 static ssize_t set_js_ctx_scheduling_mode(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
-	struct kbasep_kctx_list_element *element;
+	struct kbase_context *kctx;
 	u32 new_js_ctx_scheduling_mode;
 	struct kbase_device *kbdev;
 	unsigned long flags;
@@ -2848,8 +3065,8 @@
 	kbdev->js_ctx_scheduling_mode = new_js_ctx_scheduling_mode;
 
 	/* Adjust priority of all the contexts as per the new mode */
-	list_for_each_entry(element, &kbdev->kctx_list, link)
-		kbase_js_update_ctx_priority(element->kctx);
+	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link)
+		kbase_js_update_ctx_priority(kctx);
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	mutex_unlock(&kbdev->kctx_list_lock);
@@ -2862,6 +3079,8 @@
 static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
 		show_js_ctx_scheduling_mode,
 		set_js_ctx_scheduling_mode);
+
+#ifdef MALI_KBASE_BUILD
 #ifdef CONFIG_DEBUG_FS
 
 /* Number of entries in serialize_jobs_settings[] */
@@ -2978,6 +3197,7 @@
 }
 
 static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
+	.owner = THIS_MODULE,
 	.open = kbasep_serialize_jobs_debugfs_open,
 	.read = seq_read,
 	.write = kbasep_serialize_jobs_debugfs_write,
@@ -2986,6 +3206,46 @@
 };
 
 #endif /* CONFIG_DEBUG_FS */
+#endif /* MALI_KBASE_BUILD */
+
+static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
+{
+	struct kbase_device *kbdev = container_of(data, struct kbase_device,
+		protected_mode_hwcnt_disable_work);
+	unsigned long flags;
+
+	bool do_disable;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	do_disable = !kbdev->protected_mode_hwcnt_desired &&
+		!kbdev->protected_mode_hwcnt_disabled;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!do_disable)
+		return;
+
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	do_disable = !kbdev->protected_mode_hwcnt_desired &&
+		!kbdev->protected_mode_hwcnt_disabled;
+
+	if (do_disable) {
+		/* Protected mode state did not change while we were doing the
+		 * disable, so commit the work we just performed and continue
+		 * the state machine.
+		 */
+		kbdev->protected_mode_hwcnt_disabled = true;
+		kbase_backend_slot_update(kbdev);
+	} else {
+		/* Protected mode state was updated while we were doing the
+		 * disable, so we need to undo the disable we just performed.
+		 */
+		kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
 
 static int kbasep_protected_mode_init(struct kbase_device *kbdev)
 {
@@ -3004,6 +3264,10 @@
 		kbdev->protected_dev->data = kbdev;
 		kbdev->protected_ops = &kbase_native_protected_ops;
 		kbdev->protected_mode_support = true;
+		INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work,
+			kbasep_protected_mode_hwcnt_disable_worker);
+		kbdev->protected_mode_hwcnt_desired = true;
+		kbdev->protected_mode_hwcnt_disabled = false;
 		return 0;
 	}
 
@@ -3053,8 +3317,10 @@
 
 static void kbasep_protected_mode_term(struct kbase_device *kbdev)
 {
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE))
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_MODE)) {
+		cancel_work_sync(&kbdev->protected_mode_hwcnt_disable_work);
 		kfree(kbdev->protected_dev);
+	}
 }
 
 #ifdef CONFIG_MALI_NO_MALI
@@ -3138,150 +3404,175 @@
 
 static int power_control_init(struct platform_device *pdev)
 {
+#if KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE || !defined(CONFIG_OF)
+	/* Power control initialization requires at least the capability to get
+	 * regulators and clocks from the device tree, as well as parsing
+	 * arrays of unsigned integer values.
+	 *
+	 * The whole initialization process shall simply be skipped if the
+	 * minimum capability is not available.
+	 */
+	return 0;
+#else
 	struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
 	int err = 0;
-	const char *reg_names[KBASE_MAX_REGULATORS];
+	unsigned int i;
 #if defined(CONFIG_REGULATOR)
-	int i;
-#endif
+	static const char *regulator_names[] = {
+		"mali", "shadercores"
+	};
+	BUILD_BUG_ON(ARRAY_SIZE(regulator_names) < BASE_MAX_NR_CLOCKS_REGULATORS);
+#endif /* CONFIG_REGULATOR */
 
 	if (!kbdev)
 		return -ENODEV;
 
-	kbdev->regulator_num = of_property_count_strings(kbdev->dev->of_node,
-		"supply-names");
-
-	if (kbdev->regulator_num > KBASE_MAX_REGULATORS) {
-		dev_err(&pdev->dev, "Too many regulators: %d > %d\n",
-			kbdev->regulator_num, KBASE_MAX_REGULATORS);
-		return -EINVAL;
-	} else if (kbdev->regulator_num == -EINVAL) {
-		/* The 'supply-names' is optional; if not there assume "mali" */
-		kbdev->regulator_num = 1;
-		reg_names[0] = "mali";
-	} else if (kbdev->regulator_num < 0) {
-		err = kbdev->regulator_num;
-	} else {
-		err = of_property_read_string_array(kbdev->dev->of_node,
-						    "supply-names", reg_names,
-						    kbdev->regulator_num);
+#if defined(CONFIG_REGULATOR)
+	/* Since the error code EPROBE_DEFER causes the entire probing
+	 * procedure to be restarted from scratch at a later time,
+	 * all regulators will be released before returning.
+	 *
+	 * Any other error is ignored and the driver will continue
+	 * operating with a partial initialization of regulators.
+	 */
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		kbdev->regulators[i] = regulator_get_optional(kbdev->dev,
+			regulator_names[i]);
+		if (IS_ERR_OR_NULL(kbdev->regulators[i])) {
+			err = PTR_ERR(kbdev->regulators[i]);
+			kbdev->regulators[i] = NULL;
+			break;
+		}
 	}
-
-	if (err < 0) {
-		dev_err(&pdev->dev, "Error reading supply-names: %d\n", err);
+	if (err == -EPROBE_DEFER) {
+		while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS))
+			regulator_put(kbdev->regulators[--i]);
 		return err;
 	}
 
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
-			&& defined(CONFIG_REGULATOR)
-	for (i = 0; i < kbdev->regulator_num; i++) {
-		kbdev->regulator[i] = regulator_get(kbdev->dev, reg_names[i]);
-		if (IS_ERR(kbdev->regulator[i])) {
-			err = PTR_ERR(kbdev->regulator[i]);
-			kbdev->regulator[i] = NULL;
-			if (err != -EPROBE_DEFER)
-				dev_err(&pdev->dev, "Failed to get regulator\n");
-			goto fail;
-		}
-	}
-#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
-
-	kbdev->clock = of_clk_get(kbdev->dev->of_node, 0);
-	if (IS_ERR_OR_NULL(kbdev->clock)) {
-		err = PTR_ERR(kbdev->clock);
-		kbdev->clock = NULL;
-		if (err == -EPROBE_DEFER) {
-			dev_err(&pdev->dev, "Failed to get clock\n");
-			goto fail;
-		}
-		dev_info(kbdev->dev, "Continuing without Mali clock control\n");
-		/* Allow probe to continue without clock. */
-	}
-
-#if defined(CONFIG_OF) && defined(CONFIG_PM_OPP)
-	/* Register the OPPs if they are available in device tree */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) \
-	|| defined(LSK_OPPV2_BACKPORT)
-#if defined(CONFIG_REGULATOR)
-	kbdev->dev_opp_table = dev_pm_opp_set_regulators(kbdev->dev, reg_names,
-							 kbdev->regulator_num);
-	if (IS_ERR(kbdev->dev_opp_table)) {
-		err = PTR_ERR(kbdev->dev_opp_table);
-		kbdev->dev_opp_table = NULL;
-		dev_err(kbdev->dev, "Failed to init devfreq opp table: %d\n",
-			err);
-	}
-#endif /* CONFIG_REGULATOR */
-	err = dev_pm_opp_of_add_table(kbdev->dev);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
-	err = of_init_opp_table(kbdev->dev);
-#else
-	err = 0;
-#endif /* LINUX_VERSION_CODE */
-	if (err)
-		dev_dbg(kbdev->dev, "OPP table not found\n");
-#endif /* CONFIG_OF && CONFIG_PM_OPP */
-
-	return 0;
-
-fail:
-
-if (kbdev->clock != NULL) {
-	clk_put(kbdev->clock);
-	kbdev->clock = NULL;
-}
-
-#ifdef CONFIG_REGULATOR
-	for (i = 0; i < kbdev->regulator_num; i++) {
-		if (NULL != kbdev->regulator[i]) {
-			regulator_put(kbdev->regulator[i]);
-			kbdev->regulator[i] = NULL;
-		}
-	}
+	kbdev->nr_regulators = i;
+	dev_dbg(&pdev->dev, "Regulators probed: %u\n", kbdev->nr_regulators);
 #endif
 
+	/* Having more clocks than regulators is acceptable, while the
+	 * opposite shall not happen.
+	 *
+	 * Since the error code EPROBE_DEFER causes the entire probing
+	 * procedure to be restarted from scratch at a later time,
+	 * all clocks and regulators will be released before returning.
+	 *
+	 * Any other error is ignored and the driver will continue
+	 * operating with a partial initialization of clocks.
+	 */
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		kbdev->clocks[i] = of_clk_get(kbdev->dev->of_node, i);
+		if (IS_ERR_OR_NULL(kbdev->clocks[i])) {
+			err = PTR_ERR(kbdev->clocks[i]);
+			kbdev->clocks[i] = NULL;
+			break;
+		}
+
+		err = clk_prepare_enable(kbdev->clocks[i]);
+		if (err) {
+			dev_err(kbdev->dev,
+				"Failed to prepare and enable clock (%d)\n",
+				err);
+			clk_put(kbdev->clocks[i]);
+			break;
+		}
+	}
+	if (err == -EPROBE_DEFER) {
+		while ((i > 0) && (i < BASE_MAX_NR_CLOCKS_REGULATORS)) {
+			clk_disable_unprepare(kbdev->clocks[--i]);
+			clk_put(kbdev->clocks[i]);
+		}
+		goto clocks_probe_defer;
+	}
+
+	kbdev->nr_clocks = i;
+	dev_dbg(&pdev->dev, "Clocks probed: %u\n", kbdev->nr_clocks);
+
+	/* Any error in parsing the OPP table from the device file
+	 * shall be ignored. The fact that the table may be absent or wrong
+	 * on the device tree of the platform shouldn't prevent the driver
+	 * from completing its initialization.
+	 */
+#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \
+	!defined(LSK_OPPV2_BACKPORT))
+	err = of_init_opp_table(kbdev->dev);
+	CSTD_UNUSED(err);
+#else
+
+#if defined(CONFIG_PM_OPP)
+#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
+	defined(CONFIG_REGULATOR))
+	if (kbdev->nr_regulators > 0) {
+		kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev,
+			regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS);
+	}
+#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+	err = dev_pm_opp_of_add_table(kbdev->dev);
+	CSTD_UNUSED(err);
+#endif /* CONFIG_PM_OPP */
+
+#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */
+	return 0;
+
+clocks_probe_defer:
+#if defined(CONFIG_REGULATOR)
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++)
+		regulator_put(kbdev->regulators[i]);
+#endif
 	return err;
+#endif /* KERNEL_VERSION(3, 18, 0) > LINUX_VERSION_CODE */
 }
 
 static void power_control_term(struct kbase_device *kbdev)
 {
-	int i;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)) || \
-		defined(LSK_OPPV2_BACKPORT)
-	dev_pm_opp_of_remove_table(kbdev->dev);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
+	unsigned int i;
+
+#if (KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE && \
+	!defined(LSK_OPPV2_BACKPORT))
+#if KERNEL_VERSION(3, 19, 0) <= LINUX_VERSION_CODE
 	of_free_opp_table(kbdev->dev);
 #endif
+#else
 
-	if (kbdev->clock) {
-		clk_put(kbdev->clock);
-		kbdev->clock = NULL;
+#if defined(CONFIG_PM_OPP)
+	dev_pm_opp_of_remove_table(kbdev->dev);
+#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
+	defined(CONFIG_REGULATOR))
+	if (!IS_ERR_OR_NULL(kbdev->opp_table))
+		dev_pm_opp_put_regulators(kbdev->opp_table);
+#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* CONFIG_PM_OPP */
+
+#endif /* KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE */
+
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		if (kbdev->clocks[i]) {
+			if (__clk_is_enabled(kbdev->clocks[i]))
+				clk_disable_unprepare(kbdev->clocks[i]);
+			clk_put(kbdev->clocks[i]);
+			kbdev->clocks[i] = NULL;
+		} else
+			break;
 	}
 
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
 			&& defined(CONFIG_REGULATOR)
-	for (i = 0; i < kbdev->regulator_num; i++) {
-		if (kbdev->regulator[i]) {
-			regulator_put(kbdev->regulator[i]);
-			kbdev->regulator[i] = NULL;
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		if (kbdev->regulators[i]) {
+			regulator_put(kbdev->regulators[i]);
+			kbdev->regulators[i] = NULL;
 		}
 	}
 #endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
-
-#ifdef CONFIG_REGULATOR
-	if (kbdev->dev_opp_table != NULL) {
-		dev_pm_opp_put_regulators(kbdev->dev_opp_table);
-		kbdev->dev_opp_table = NULL;
-	}
-#endif
 }
 
 #ifdef MALI_KBASE_BUILD
 #ifdef CONFIG_DEBUG_FS
 
-#if KBASE_GPU_RESET_EN
-#include <mali_kbase_hwaccess_jm.h>
-
 static void trigger_quirks_reload(struct kbase_device *kbdev)
 {
 	kbase_pm_context_active(kbdev);
@@ -3315,7 +3606,6 @@
 MAKE_QUIRK_ACCESSORS(mmu);
 MAKE_QUIRK_ACCESSORS(jm);
 
-#endif /* KBASE_GPU_RESET_EN */
 
 /**
  * debugfs_protected_debug_mode_read - "protected_debug_mode" debugfs read
@@ -3354,11 +3644,51 @@
  * Contains the file operations for the "protected_debug_mode" debugfs file
  */
 static const struct file_operations fops_protected_debug_mode = {
+	.owner = THIS_MODULE,
 	.open = simple_open,
 	.read = debugfs_protected_debug_mode_read,
 	.llseek = default_llseek,
 };
 
+static int kbase_device_debugfs_mem_pool_max_size_show(struct seq_file *sfile,
+	void *data)
+{
+	CSTD_UNUSED(data);
+	return kbase_debugfs_helper_seq_read(sfile,
+		MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_config_debugfs_max_size);
+}
+
+static ssize_t kbase_device_debugfs_mem_pool_max_size_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	int err = 0;
+
+	CSTD_UNUSED(ppos);
+	err = kbase_debugfs_helper_seq_write(file, ubuf, count,
+		MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_config_debugfs_set_max_size);
+
+	return err ? err : count;
+}
+
+static int kbase_device_debugfs_mem_pool_max_size_open(struct inode *in,
+	struct file *file)
+{
+	return single_open(file, kbase_device_debugfs_mem_pool_max_size_show,
+		in->i_private);
+}
+
+static const struct file_operations
+	kbase_device_debugfs_mem_pool_max_size_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_device_debugfs_mem_pool_max_size_open,
+	.read = seq_read,
+	.write = kbase_device_debugfs_mem_pool_max_size_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
 static int kbase_device_debugfs_init(struct kbase_device *kbdev)
 {
 	struct dentry *debugfs_ctx_defaults_directory;
@@ -3396,7 +3726,6 @@
 	kbase_debug_job_fault_debugfs_init(kbdev);
 	kbasep_gpu_memory_debugfs_init(kbdev);
 	kbase_as_fault_debugfs_init(kbdev);
-#if KBASE_GPU_RESET_EN
 	/* fops_* variables created by invocations of macro
 	 * MAKE_QUIRK_ACCESSORS() above. */
 	debugfs_create_file("quirks_sc", 0644,
@@ -3411,15 +3740,20 @@
 	debugfs_create_file("quirks_jm", 0644,
 			kbdev->mali_debugfs_directory, kbdev,
 			&fops_jm_quirks);
-#endif /* KBASE_GPU_RESET_EN */
 
 	debugfs_create_bool("infinite_cache", 0644,
 			debugfs_ctx_defaults_directory,
 			&kbdev->infinite_cache_active_default);
 
-	debugfs_create_size_t("mem_pool_max_size", 0644,
+	debugfs_create_file("mem_pool_max_size", 0644,
 			debugfs_ctx_defaults_directory,
-			&kbdev->mem_pool_max_size_default);
+			&kbdev->mem_pool_defaults.small,
+			&kbase_device_debugfs_mem_pool_max_size_fops);
+
+	debugfs_create_file("lp_mem_pool_max_size", 0644,
+			debugfs_ctx_defaults_directory,
+			&kbdev->mem_pool_defaults.large,
+			&kbase_device_debugfs_mem_pool_max_size_fops);
 
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
 		debugfs_create_file("protected_debug_mode", S_IRUGO,
@@ -3438,11 +3772,10 @@
 #endif /* CONFIG_DEVFREQ_THERMAL */
 #endif /* CONFIG_MALI_DEVFREQ */
 
-#ifdef CONFIG_DEBUG_FS
 	debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR,
 			kbdev->mali_debugfs_directory, kbdev,
 			&kbasep_serialize_jobs_debugfs_fops);
-#endif /* CONFIG_DEBUG_FS */
+
 
 	return 0;
 
@@ -3479,9 +3812,8 @@
 	 * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
 	 * documented for tMIx so force correct value here.
 	 */
-	if (GPU_ID_IS_NEW_FORMAT(prod_id) &&
-		   (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
-				   GPU_ID2_PRODUCT_TMIX))
+	if (GPU_ID2_MODEL_MATCH_VALUE(prod_id) ==
+			GPU_ID2_PRODUCT_TMIX)
 		if (supported_coherency_bitmap ==
 				COHERENCY_FEATURE_BIT(COHERENCY_ACE))
 			supported_coherency_bitmap |=
@@ -3521,7 +3853,7 @@
 		kbdev->system_coherency;
 }
 
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#ifdef CONFIG_MALI_BUSLOG
 
 /* Callback used by the kbase bus logger client, to initiate a GPU reset
  * when the bus log is restarted.  GPU reset is used as reference point
@@ -3542,9 +3874,6 @@
 	&dev_attr_debug_command.attr,
 	&dev_attr_js_softstop_always.attr,
 #endif
-#if !MALI_CUSTOMER_RELEASE
-	&dev_attr_force_replay.attr,
-#endif
 	&dev_attr_js_timeouts.attr,
 	&dev_attr_soft_job_timeout.attr,
 	&dev_attr_gpuinfo.attr,
@@ -3576,14 +3905,13 @@
 
 	kfree(kbdev->gpu_props.prop_buffer);
 
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#ifdef CONFIG_MALI_BUSLOG
 	if (kbdev->inited_subsys & inited_buslogger) {
 		bl_core_client_unregister(kbdev->buslogger);
 		kbdev->inited_subsys &= ~inited_buslogger;
 	}
 #endif
 
-
 	if (kbdev->inited_subsys & inited_dev_list) {
 		dev_list = kbase_dev_list_get();
 		list_del(&kbdev->entry);
@@ -3618,26 +3946,34 @@
 		kbdev->inited_subsys &= ~inited_job_fault;
 	}
 
-#ifdef CONFIG_MALI_DEVFREQ
-	if (kbdev->inited_subsys & inited_devfreq) {
-		kbase_devfreq_term(kbdev);
-		kbdev->inited_subsys &= ~inited_devfreq;
-	}
-#endif
-
-
-	if (kbdev->inited_subsys & inited_vinstr) {
-		kbase_vinstr_term(kbdev->vinstr_ctx);
-		kbdev->inited_subsys &= ~inited_vinstr;
-	}
 
 	if (kbdev->inited_subsys & inited_backend_late) {
 		kbase_backend_late_term(kbdev);
 		kbdev->inited_subsys &= ~inited_backend_late;
 	}
 
+	if (kbdev->inited_subsys & inited_vinstr) {
+		kbase_vinstr_term(kbdev->vinstr_ctx);
+		kbdev->inited_subsys &= ~inited_vinstr;
+	}
+
+	if (kbdev->inited_subsys & inited_hwcnt_gpu_virt) {
+		kbase_hwcnt_virtualizer_term(kbdev->hwcnt_gpu_virt);
+		kbdev->inited_subsys &= ~inited_hwcnt_gpu_virt;
+	}
+
+	if (kbdev->inited_subsys & inited_hwcnt_gpu_ctx) {
+		kbase_hwcnt_context_term(kbdev->hwcnt_gpu_ctx);
+		kbdev->inited_subsys &= ~inited_hwcnt_gpu_ctx;
+	}
+
+	if (kbdev->inited_subsys & inited_hwcnt_gpu_iface) {
+		kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface);
+		kbdev->inited_subsys &= ~inited_hwcnt_gpu_iface;
+	}
+
 	if (kbdev->inited_subsys & inited_tlstream) {
-		kbase_tlstream_term();
+		kbase_timeline_term(kbdev->timeline);
 		kbdev->inited_subsys &= ~inited_tlstream;
 	}
 
@@ -3709,6 +4045,29 @@
 	return 0;
 }
 
+void kbase_backend_devfreq_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_term(kbdev);
+		kbdev->inited_subsys &= ~inited_devfreq;
+	}
+#endif
+}
+
+int kbase_backend_devfreq_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_DEVFREQ
+	/* Devfreq uses hardware counters, so must be initialized after it. */
+	int err = kbase_devfreq_init(kbdev);
+
+	if (!err)
+		kbdev->inited_subsys |= inited_devfreq;
+	else
+		dev_err(kbdev->dev, "Continuing without devfreq\n");
+#endif /* CONFIG_MALI_DEVFREQ */
+	return 0;
+}
 
 /* Number of register accesses for the buffer that we allocate during
  * initialization time. The buffer size can be changed later via debugfs. */
@@ -3777,9 +4136,7 @@
 
 	err = kbase_backend_early_init(kbdev);
 	if (err) {
-		if (err != -EPROBE_DEFER)
-			dev_err(kbdev->dev,
-				"Early backend initialization failed\n");
+		dev_err(kbdev->dev, "Early backend initialization failed\n");
 		kbase_platform_device_remove(pdev);
 		return err;
 	}
@@ -3787,6 +4144,7 @@
 
 	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
 			kbase_dev_nr);
+	kbdev->id = kbase_dev_nr;
 
 	kbase_disjoint_init(kbdev);
 
@@ -3852,7 +4210,8 @@
 	}
 	kbdev->inited_subsys |= inited_js;
 
-	err = kbase_tlstream_init();
+	atomic_set(&kbdev->timeline_is_enabled, 0);
+	err = kbase_timeline_init(&kbdev->timeline, &kbdev->timeline_is_enabled);
 	if (err) {
 		dev_err(kbdev->dev, "Timeline stream initialization failed\n");
 		kbase_platform_device_remove(pdev);
@@ -3860,6 +4219,48 @@
 	}
 	kbdev->inited_subsys |= inited_tlstream;
 
+	err = kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface);
+	if (err) {
+		dev_err(kbdev->dev, "GPU hwcnt backend creation failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_hwcnt_gpu_iface;
+
+	err = kbase_hwcnt_context_init(&kbdev->hwcnt_gpu_iface,
+		&kbdev->hwcnt_gpu_ctx);
+	if (err) {
+		dev_err(kbdev->dev,
+			"GPU hwcnt context initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_hwcnt_gpu_ctx;
+
+	err = kbase_hwcnt_virtualizer_init(
+		kbdev->hwcnt_gpu_ctx,
+		KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS,
+		&kbdev->hwcnt_gpu_virt);
+	if (err) {
+		dev_err(kbdev->dev,
+			"GPU hwcnt virtualizer initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return err;
+	}
+	kbdev->inited_subsys |= inited_hwcnt_gpu_virt;
+
+	err = kbase_vinstr_init(kbdev->hwcnt_gpu_virt, &kbdev->vinstr_ctx);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Virtual instrumentation initialization failed\n");
+		kbase_platform_device_remove(pdev);
+		return -EINVAL;
+	}
+	kbdev->inited_subsys |= inited_vinstr;
+
+	/* The initialization of the devfreq is now embedded inside the
+	 * kbase_backend_late_init(), calling the kbase_backend_devfreq_init()
+	 * before the first trigger of pm_context_idle(). */
 	err = kbase_backend_late_init(kbdev);
 	if (err) {
 		dev_err(kbdev->dev, "Late backend initialization failed\n");
@@ -3868,28 +4269,6 @@
 	}
 	kbdev->inited_subsys |= inited_backend_late;
 
-	/* Initialize the kctx list. This is used by vinstr. */
-	mutex_init(&kbdev->kctx_list_lock);
-	INIT_LIST_HEAD(&kbdev->kctx_list);
-
-	kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
-	if (!kbdev->vinstr_ctx) {
-		dev_err(kbdev->dev,
-			"Virtual instrumentation initialization failed\n");
-		kbase_platform_device_remove(pdev);
-		return -EINVAL;
-	}
-	kbdev->inited_subsys |= inited_vinstr;
-
-
-#ifdef CONFIG_MALI_DEVFREQ
-	/* Devfreq uses vinstr, so must be initialized after it. */
-	err = kbase_devfreq_init(kbdev);
-	if (!err)
-		kbdev->inited_subsys |= inited_devfreq;
-	else
-		dev_err(kbdev->dev, "Continuing without devfreq\n");
-#endif /* CONFIG_MALI_DEVFREQ */
 
 #ifdef MALI_KBASE_BUILD
 	err = kbase_debug_job_fault_dev_init(kbdev);
@@ -3945,7 +4324,7 @@
 	kbdev->inited_subsys |= inited_misc_register;
 
 
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#ifdef CONFIG_MALI_BUSLOG
 	err = bl_core_client_register(kbdev->devname,
 						kbase_logging_started_cb,
 						kbdev, &kbdev->buslogger,
@@ -3993,13 +4372,16 @@
 	if (!kbdev)
 		return -ENODEV;
 
+	kbase_pm_suspend(kbdev);
+
 #if defined(CONFIG_MALI_DEVFREQ) && \
 		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
-	if (kbdev->inited_subsys & inited_devfreq)
-		devfreq_suspend_device(kbdev->devfreq);
+	dev_dbg(dev, "Callback %s\n", __func__);
+	if (kbdev->inited_subsys & inited_devfreq) {
+		kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND);
+		flush_workqueue(kbdev->devfreq_queue.workq);
+	}
 #endif
-
-	kbase_pm_suspend(kbdev);
 	return 0;
 }
 
@@ -4023,8 +4405,14 @@
 
 #if defined(CONFIG_MALI_DEVFREQ) && \
 		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
-	if (kbdev->inited_subsys & inited_devfreq)
-		devfreq_resume_device(kbdev->devfreq);
+	dev_dbg(dev, "Callback %s\n", __func__);
+	if (kbdev->inited_subsys & inited_devfreq) {
+		mutex_lock(&kbdev->pm.lock);
+		if (kbdev->pm.active_count > 0)
+			kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME);
+		mutex_unlock(&kbdev->pm.lock);
+		flush_workqueue(kbdev->devfreq_queue.workq);
+	}
 #endif
 	return 0;
 }
@@ -4048,10 +4436,11 @@
 	if (!kbdev)
 		return -ENODEV;
 
+	dev_dbg(dev, "Callback %s\n", __func__);
 #if defined(CONFIG_MALI_DEVFREQ) && \
 		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
 	if (kbdev->inited_subsys & inited_devfreq)
-		devfreq_suspend_device(kbdev->devfreq);
+		kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_SUSPEND);
 #endif
 
 	if (kbdev->pm.backend.callback_power_runtime_off) {
@@ -4081,6 +4470,7 @@
 	if (!kbdev)
 		return -ENODEV;
 
+	dev_dbg(dev, "Callback %s\n", __func__);
 	if (kbdev->pm.backend.callback_power_runtime_on) {
 		ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
 		dev_dbg(dev, "runtime resume\n");
@@ -4089,7 +4479,7 @@
 #if defined(CONFIG_MALI_DEVFREQ) && \
 		(LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
 	if (kbdev->inited_subsys & inited_devfreq)
-		devfreq_resume_device(kbdev->devfreq);
+		kbase_devfreq_enqueue_work(kbdev, DEVFREQ_WORK_RESUME);
 #endif
 
 	return ret;
@@ -4115,10 +4505,15 @@
 	if (!kbdev)
 		return -ENODEV;
 
+	dev_dbg(dev, "Callback %s\n", __func__);
 	/* Use platform specific implementation if it exists. */
 	if (kbdev->pm.backend.callback_power_runtime_idle)
 		return kbdev->pm.backend.callback_power_runtime_idle(kbdev);
 
+	/* Just need to update the device's last busy mark. Kernel will respect
+	 * the autosuspend delay and so won't suspend the device immediately.
+	 */
+	pm_runtime_mark_last_busy(kbdev->dev);
 	return 0;
 }
 #endif /* KBASE_PM_RUNTIME */
@@ -4137,24 +4532,8 @@
 
 #ifdef CONFIG_OF
 static const struct of_device_id kbase_dt_ids[] = {
-	/* DOWNSTREAM ONLY--DO NOT USE */
 	{ .compatible = "arm,malit6xx" },
 	{ .compatible = "arm,mali-midgard" },
-
-	/* From upstream bindings */
-	{ .compatible = "arm,mali-t604" },
-	{ .compatible = "arm,mali-t624" },
-	{ .compatible = "arm,mali-t628" },
-	{ .compatible = "arm,mali-t720" },
-	{ .compatible = "arm,mali-t760" },
-	{ .compatible = "arm,mali-t820" },
-	{ .compatible = "arm,mali-t830" },
-	{ .compatible = "arm,mali-t860" },
-	{ .compatible = "arm,mali-t880" },
-
-	/* Upstream bindings for Bifrost */
-	{ .compatible = "arm,mali-bifrost" },
-
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, kbase_dt_ids);
@@ -4221,51 +4600,30 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
-EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
 
-void kbase_trace_mali_pm_status(u32 event, u64 value)
+void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value)
 {
-	trace_mali_pm_status(event, value);
+	trace_mali_pm_status(dev_id, event, value);
 }
 
-void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id)
 {
-	trace_mali_pm_power_off(event, value);
+	trace_mali_job_slots_event(dev_id, event,
+		(kctx != NULL ? kctx->tgid : 0),
+		(kctx != NULL ? kctx->pid : 0),
+		atom_id);
 }
 
-void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value)
 {
-	trace_mali_pm_power_on(event, value);
+	trace_mali_page_fault_insert_pages(dev_id, event, value);
 }
 
-void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event)
 {
-	trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
-}
-
-void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
-{
-	trace_mali_page_fault_insert_pages(event, value);
-}
-
-void kbase_trace_mali_mmu_as_in_use(int event)
-{
-	trace_mali_mmu_as_in_use(event);
-}
-
-void kbase_trace_mali_mmu_as_released(int event)
-{
-	trace_mali_mmu_as_released(event);
-}
-
-void kbase_trace_mali_total_alloc_pages_change(long long int event)
-{
-	trace_mali_total_alloc_pages_change(event);
+	trace_mali_total_alloc_pages_change(dev_id, event);
 }
 #endif /* CONFIG_MALI_GATOR_SUPPORT */
 #ifdef CONFIG_MALI_SYSTEM_TRACE
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
index d2c09d6..dbc774d 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,6 +39,30 @@
 	return ret;
 }
 
+static void kbase_ctx_remove_pending_event(struct kbase_context *kctx)
+{
+	struct list_head *event_list = &kctx->kbdev->job_fault_event_list;
+	struct base_job_fault_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kctx->kbdev->job_fault_event_lock, flags);
+	list_for_each_entry(event, event_list, head) {
+		if (event->katom->kctx == kctx) {
+			list_del(&event->head);
+			spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags);
+
+			wake_up(&kctx->kbdev->job_fault_resume_wq);
+			flush_work(&event->job_fault_work);
+
+			/* job_fault_event_list can only have a single atom for
+			 * each context.
+			 */
+			return;
+		}
+	}
+	spin_unlock_irqrestore(&kctx->kbdev->job_fault_event_lock, flags);
+}
+
 static bool kbase_ctx_has_no_event_pending(struct kbase_context *kctx)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
@@ -62,6 +86,25 @@
 	return true;
 }
 
+static int wait_for_job_fault(struct kbase_device *kbdev)
+{
+#if KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE && \
+	KERNEL_VERSION(4, 15, 0) > LINUX_VERSION_CODE
+	int ret = wait_event_interruptible_timeout(kbdev->job_fault_wq,
+			kbase_is_job_fault_event_pending(kbdev),
+			msecs_to_jiffies(2000));
+	if (ret == 0)
+		return -EAGAIN;
+	else if (ret > 0)
+		return 0;
+	else
+		return ret;
+#else
+	return wait_event_interruptible(kbdev->job_fault_wq,
+			kbase_is_job_fault_event_pending(kbdev));
+#endif
+}
+
 /* wait until the fault happen and copy the event */
 static int kbase_job_fault_event_wait(struct kbase_device *kbdev,
 		struct base_job_fault_event *event)
@@ -71,11 +114,15 @@
 	unsigned long               flags;
 
 	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
-	if (list_empty(event_list)) {
+	while (list_empty(event_list)) {
+		int err;
+
 		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
-		if (wait_event_interruptible(kbdev->job_fault_wq,
-				 kbase_is_job_fault_event_pending(kbdev)))
-			return -ERESTARTSYS;
+
+		err = wait_for_job_fault(kbdev);
+		if (err)
+			return err;
+
 		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
 	}
 
@@ -122,24 +169,6 @@
 
 }
 
-/* Remove all the failed atoms that belong to different contexts
- * Resume all the contexts that were suspend due to failed job
- */
-static void kbase_job_fault_event_cleanup(struct kbase_device *kbdev)
-{
-	struct list_head *event_list = &kbdev->job_fault_event_list;
-	unsigned long    flags;
-
-	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
-	while (!list_empty(event_list)) {
-		kbase_job_fault_event_dequeue(kbdev, event_list);
-		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
-		wake_up(&kbdev->job_fault_resume_wq);
-		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
-	}
-	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
-}
-
 static void kbase_job_fault_resume_worker(struct work_struct *data)
 {
 	struct base_job_fault_event *event = container_of(data,
@@ -237,7 +266,10 @@
 		return true;
 	}
 
-	if (kctx->kbdev->job_fault_debug == true) {
+	if (kbase_ctx_flag(kctx, KCTX_DYING))
+		return false;
+
+	if (atomic_read(&kctx->kbdev->job_fault_debug) > 0) {
 
 		if (completion_code != BASE_JD_EVENT_DONE) {
 
@@ -337,7 +369,7 @@
 		 * job done but we delayed it. Now we should clean cache
 		 * earlier. Then the GPU memory dump should be correct.
 		 */
-		kbase_backend_cacheclean(kbdev, event->katom);
+		kbase_backend_cache_clean(kbdev, event->katom);
 	} else
 		return NULL;
 
@@ -383,12 +415,16 @@
 {
 	struct kbase_device *kbdev = in->i_private;
 
+	if (atomic_cmpxchg(&kbdev->job_fault_debug, 0, 1) == 1) {
+		dev_warn(kbdev->dev, "debug job fault is busy, only a single client is allowed");
+		return -EBUSY;
+	}
+
 	seq_open(file, &ops);
 
 	((struct seq_file *)file->private_data)->private = kbdev;
 	dev_info(kbdev->dev, "debug job fault seq open");
 
-	kbdev->job_fault_debug = true;
 
 	return 0;
 
@@ -397,15 +433,35 @@
 static int debug_job_fault_release(struct inode *in, struct file *file)
 {
 	struct kbase_device *kbdev = in->i_private;
+	struct list_head *event_list = &kbdev->job_fault_event_list;
+	unsigned long    flags;
 
 	seq_release(in, file);
 
-	kbdev->job_fault_debug = false;
+	spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+
+	/* Disable job fault dumping. This will let kbase run jobs as normal,
+	 * without blocking waiting for a job_fault client to read failed jobs.
+	 *
+	 * After this a new client may open the file, and may re-enable job
+	 * fault dumping, but the job_fault_event_lock we hold here will block
+	 * that from interfering until after we've completed the cleanup.
+	 */
+	atomic_dec(&kbdev->job_fault_debug);
 
 	/* Clean the unprocessed job fault. After that, all the suspended
-	 * contexts could be rescheduled.
+	 * contexts could be rescheduled. Remove all the failed atoms that
+	 * belong to different contexts Resume all the contexts that were
+	 * suspend due to failed job.
 	 */
-	kbase_job_fault_event_cleanup(kbdev);
+	while (!list_empty(event_list)) {
+		kbase_job_fault_event_dequeue(kbdev, event_list);
+		spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
+		wake_up(&kbdev->job_fault_resume_wq);
+		spin_lock_irqsave(&kbdev->job_fault_event_lock, flags);
+	}
+
+	spin_unlock_irqrestore(&kbdev->job_fault_event_lock, flags);
 
 	dev_info(kbdev->dev, "debug job fault seq close");
 
@@ -413,6 +469,7 @@
 }
 
 static const struct file_operations kbasep_debug_job_fault_fops = {
+	.owner = THIS_MODULE,
 	.open = debug_job_fault_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -424,7 +481,7 @@
  */
 void kbase_debug_job_fault_debugfs_init(struct kbase_device *kbdev)
 {
-	debugfs_create_file("job_fault", S_IRUGO,
+	debugfs_create_file("job_fault", 0400,
 			kbdev->mali_debugfs_directory, kbdev,
 			&kbasep_debug_job_fault_fops);
 }
@@ -444,7 +501,7 @@
 	if (!kbdev->job_fault_resume_workq)
 		return -ENOMEM;
 
-	kbdev->job_fault_debug = false;
+	atomic_set(&kbdev->job_fault_debug, 0);
 
 	return 0;
 }
@@ -488,12 +545,17 @@
 	vfree(kctx->reg_dump);
 }
 
+void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx)
+{
+	WARN_ON(!kbase_ctx_flag(kctx, KCTX_DYING));
+
+	kbase_ctx_remove_pending_event(kctx);
+}
+
 #else /* CONFIG_DEBUG_FS */
 
 int kbase_debug_job_fault_dev_init(struct kbase_device *kbdev)
 {
-	kbdev->job_fault_debug = false;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
index f5ab0a4..ef69627 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_job_fault.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -65,6 +65,21 @@
 void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
 
 /**
+ * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault
+ *					dumping on context termination.
+ *
+ * This function is called during context termination to unblock the atom for
+ * which the job fault occurred and also the atoms following it. This is needed
+ * otherwise the wait for zero jobs could timeout (leading to an assertion
+ * failure, kernel panic in debug builds) in the pathological case where
+ * although the thread/daemon capturing the job fault events is running,
+ * but for some reasons has stopped consuming the events.
+ *
+ * @kctx: KBase context pointer
+ */
+void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx);
+
+/**
  * kbase_debug_job_fault_process - Process the failed job.
  *      It will send a event and wake up the job fault waiting queue
  *      Then create a work queue to wait for job dump finish
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
index ee45529..c091f16 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,10 @@
 
 #ifdef CONFIG_DEBUG_FS
 
+#if (KERNEL_VERSION(4, 1, 0) > LINUX_VERSION_CODE)
+#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
+#endif
+
 struct debug_mem_mapping {
 	struct list_head node;
 
@@ -194,14 +198,16 @@
 
 static int debug_mem_open(struct inode *i, struct file *file)
 {
-	struct file *kctx_file = i->i_private;
-	struct kbase_context *kctx = kctx_file->private_data;
+	struct kbase_context *const kctx = i->i_private;
 	struct debug_mem_data *mem_data;
 	int ret;
 
+	if (get_file_rcu(kctx->filp) == 0)
+		return -ENOENT;
+
 	ret = seq_open(file, &ops);
 	if (ret)
-		return ret;
+		goto open_fail;
 
 	mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
 	if (!mem_data) {
@@ -213,8 +219,6 @@
 
 	INIT_LIST_HEAD(&mem_data->mapping_list);
 
-	get_file(kctx_file);
-
 	kbase_gpu_vm_lock(kctx);
 
 	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
@@ -246,16 +250,18 @@
 			list_del(&mapping->node);
 			kfree(mapping);
 		}
-		fput(kctx_file);
 		kfree(mem_data);
 	}
 	seq_release(i, file);
+open_fail:
+	fput(kctx->filp);
+
 	return ret;
 }
 
 static int debug_mem_release(struct inode *inode, struct file *file)
 {
-	struct file *kctx_file = inode->i_private;
+	struct kbase_context *const kctx = inode->i_private;
 	struct seq_file *sfile = file->private_data;
 	struct debug_mem_data *mem_data = sfile->private;
 	struct debug_mem_mapping *mapping;
@@ -272,33 +278,29 @@
 
 	kfree(mem_data);
 
-	fput(kctx_file);
+	fput(kctx->filp);
 
 	return 0;
 }
 
 static const struct file_operations kbase_debug_mem_view_fops = {
+	.owner = THIS_MODULE,
 	.open = debug_mem_open,
 	.release = debug_mem_release,
 	.read = seq_read,
 	.llseek = seq_lseek
 };
 
-/**
- * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
- * @kctx_file: The /dev/mali0 file instance for the context
- *
- * This function creates a "mem_view" file which can be used to get a view of
- * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
- *
- * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
- * parent directory.
- */
-void kbase_debug_mem_view_init(struct file *kctx_file)
+void kbase_debug_mem_view_init(struct kbase_context *const kctx)
 {
-	struct kbase_context *kctx = kctx_file->private_data;
+	/* Caller already ensures this, but we keep the pattern for
+	 * maintenance safety.
+	 */
+	if (WARN_ON(!kctx) ||
+		WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
 
-	debugfs_create_file("mem_view", S_IRUSR, kctx->kctx_dentry, kctx_file,
+	debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx,
 			&kbase_debug_mem_view_fops);
 }
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
index 886ca94..b948b7c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2015, 2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,6 +25,16 @@
 
 #include <mali_kbase.h>
 
-void kbase_debug_mem_view_init(struct file *kctx_file);
+/**
+ * kbase_debug_mem_view_init - Initialize the mem_view sysfs file
+ * @kctx: Pointer to kernel base context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct kbase_context *kctx);
 
 #endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c b/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c
new file mode 100644
index 0000000..37e507b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.c
@@ -0,0 +1,183 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "mali_kbase_debugfs_helper.h"
+
+/* Arbitrary maximum size to prevent user space allocating too much kernel
+ * memory
+ */
+#define DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE (256u)
+
+/**
+ * set_attr_from_string - Parse a string to set elements of an array
+ *
+ * This is the core of the implementation of
+ * kbase_debugfs_helper_set_attr_from_string. The only difference between the
+ * two functions is that this one requires the input string to be writable.
+ *
+ * @buf:         Input string to parse. Must be nul-terminated!
+ * @array:       Address of an object that can be accessed like an array.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+static int set_attr_from_string(
+	char *const buf,
+	void *const array, size_t const nelems,
+	kbase_debugfs_helper_set_attr_fn const set_attr_fn)
+{
+	size_t index, err = 0;
+	char *ptr = buf;
+
+	for (index = 0; index < nelems && *ptr; ++index) {
+		unsigned long new_size;
+		size_t len;
+		char sep;
+
+		/* Drop leading spaces */
+		while (*ptr == ' ')
+			ptr++;
+
+		len = strcspn(ptr, "\n ");
+		if (len == 0) {
+			/* No more values (allow this) */
+			break;
+		}
+
+		/* Substitute a nul terminator for a space character
+		 * to make the substring valid for kstrtoul.
+		 */
+		sep = ptr[len];
+		if (sep == ' ')
+			ptr[len++] = '\0';
+
+		err = kstrtoul(ptr, 0, &new_size);
+		if (err)
+			break;
+
+		/* Skip the substring (including any premature nul terminator)
+		 */
+		ptr += len;
+
+		set_attr_fn(array, index, new_size);
+	}
+
+	return err;
+}
+
+int kbase_debugfs_helper_set_attr_from_string(
+	const char *const buf, void *const array, size_t const nelems,
+	kbase_debugfs_helper_set_attr_fn const set_attr_fn)
+{
+	char *const wbuf = kstrdup(buf, GFP_KERNEL);
+	int err = 0;
+
+	if (!wbuf)
+		return -ENOMEM;
+
+	err = set_attr_from_string(wbuf, array, nelems,
+		set_attr_fn);
+
+	kfree(wbuf);
+	return err;
+}
+
+ssize_t kbase_debugfs_helper_get_attr_to_string(
+	char *const buf, size_t const size,
+	void *const array, size_t const nelems,
+	kbase_debugfs_helper_get_attr_fn const get_attr_fn)
+{
+	ssize_t total = 0;
+	size_t index;
+
+	for (index = 0; index < nelems; ++index) {
+		const char *postfix = " ";
+
+		if (index == (nelems-1))
+			postfix = "\n";
+
+		total += scnprintf(buf + total, size - total, "%zu%s",
+				get_attr_fn(array, index), postfix);
+	}
+
+	return total;
+}
+
+int kbase_debugfs_helper_seq_write(struct file *const file,
+	const char __user *const ubuf, size_t const count,
+	size_t const nelems,
+	kbase_debugfs_helper_set_attr_fn const set_attr_fn)
+{
+	const struct seq_file *const sfile = file->private_data;
+	void *const array = sfile->private;
+	int err = 0;
+	char *buf;
+
+	if (WARN_ON(!array))
+		return -EINVAL;
+
+	if (WARN_ON(count > DEBUGFS_MEM_POOLS_MAX_WRITE_SIZE))
+		return -EINVAL;
+
+	buf = kmalloc(count + 1, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, ubuf, count)) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	buf[count] = '\0';
+	err = set_attr_from_string(buf,
+		array, nelems, set_attr_fn);
+	kfree(buf);
+
+	return err;
+}
+
+int kbase_debugfs_helper_seq_read(struct seq_file *const sfile,
+	size_t const nelems,
+	kbase_debugfs_helper_get_attr_fn const get_attr_fn)
+{
+	void *const array = sfile->private;
+	size_t index;
+
+	if (WARN_ON(!array))
+		return -EINVAL;
+
+	for (index = 0; index < nelems; ++index) {
+		const char *postfix = " ";
+
+		if (index == (nelems-1))
+			postfix = "\n";
+
+		seq_printf(sfile, "%zu%s", get_attr_fn(array, index), postfix);
+	}
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h b/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h
new file mode 100644
index 0000000..c3c9efa
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debugfs_helper.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DEBUGFS_HELPER_H_
+#define _KBASE_DEBUGFS_HELPER_H_
+
+/**
+ * typedef kbase_debugfs_helper_set_attr_fn - Type of function to set an
+ *                                            attribute value from an array
+ *
+ * @array: Address of an object that can be accessed like an array.
+ * @index: An element index. The valid range depends on the use-case.
+ * @value: Attribute value to be set.
+ */
+typedef void (*kbase_debugfs_helper_set_attr_fn)(
+	void *array, size_t index, size_t value);
+
+/**
+ * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an
+ *                                             array
+ *
+ * The given function is called once for each attribute value found in the
+ * input string. It is not an error if the string specifies fewer attribute
+ * values than the specified number of array elements.
+ *
+ * The number base of each attribute value is detected automatically
+ * according to the standard rules (e.g. prefix "0x" for hexadecimal).
+ * Attribute values are separated by one or more space characters.
+ * Additional leading and trailing spaces are ignored.
+ *
+ * @buf:         Input string to parse. Must be nul-terminated!
+ * @array:       Address of an object that can be accessed like an array.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_debugfs_helper_set_attr_from_string(
+	const char *buf, void *array, size_t nelems,
+	kbase_debugfs_helper_set_attr_fn set_attr_fn);
+
+/**
+ * typedef kbase_debugfs_helper_get_attr_fn - Type of function to get an
+ *                                            attribute value from an array
+ *
+ * @array: Address of an object that can be accessed like an array.
+ * @index: An element index. The valid range depends on the use-case.
+ *
+ * Return: Value of attribute.
+ */
+typedef size_t (*kbase_debugfs_helper_get_attr_fn)(
+	void *array, size_t index);
+
+/**
+ * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string
+ *                                           from elements in an array
+ *
+ * The given function is called once for each array element to get the
+ * value of the attribute to be inspected. The attribute values are
+ * written to the buffer as a formatted string of decimal numbers
+ * separated by spaces and terminated by a linefeed.
+ *
+ * @buf:         Buffer in which to store the formatted output string.
+ * @size:        The size of the buffer, in bytes.
+ * @array:       Address of an object that can be accessed like an array.
+ * @nelems:      Number of elements in the array.
+ * @get_attr_fn: Function to be called back for each array element.
+ *
+ * Return: Number of characters written excluding the nul terminator.
+ */
+ssize_t kbase_debugfs_helper_get_attr_to_string(
+	char *buf, size_t size, void *array, size_t nelems,
+	kbase_debugfs_helper_get_attr_fn get_attr_fn);
+
+/**
+ * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an
+ *                                 array
+ *
+ * The virtual file must have been opened by calling single_open and passing
+ * the address of an object that can be accessed like an array.
+ *
+ * The given function is called once for each array element to get the
+ * value of the attribute to be inspected. The attribute values are
+ * written to the buffer as a formatted string of decimal numbers
+ * separated by spaces and terminated by a linefeed.
+ *
+ * @sfile:       A virtual file previously opened by calling single_open.
+ * @nelems:      Number of elements in the array.
+ * @get_attr_fn: Function to be called back for each array element.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_debugfs_helper_seq_read(
+	struct seq_file *const sfile, size_t const nelems,
+	kbase_debugfs_helper_get_attr_fn const get_attr_fn);
+
+/**
+ * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an
+ *                                  array
+ *
+ * The virtual file must have been opened by calling single_open and passing
+ * the address of an object that can be accessed like an array.
+ *
+ * The given function is called once for each attribute value found in the
+ * data written to the virtual file. For further details, refer to the
+ * description of set_attr_from_string.
+ *
+ * @file:        A virtual file previously opened by calling single_open.
+ * @ubuf:        Source address in user space.
+ * @count:       Number of bytes written to the virtual file.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_debugfs_helper_seq_write(struct file *const file,
+	const char __user *const ubuf, size_t const count,
+	size_t const nelems,
+	kbase_debugfs_helper_set_attr_fn const set_attr_fn);
+
+#endif  /*_KBASE_DEBUGFS_HELPER_H_ */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h
index bcb4132..4f1c070 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,16 +40,16 @@
 #include <mali_kbase_instr_defs.h>
 #include <mali_kbase_pm.h>
 #include <mali_kbase_gpuprops_types.h>
+#include <mali_kbase_hwcnt_backend_gpu.h>
 #include <protected_mode_switcher.h>
 
-
 #include <linux/atomic.h>
 #include <linux/mempool.h>
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/sizes.h>
 
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#ifdef CONFIG_MALI_BUSLOG
 #include <linux/bus_logger.h>
 #endif
 
@@ -69,6 +69,7 @@
 
 #include <linux/clk.h>
 #include <linux/regulator/consumer.h>
+#include <linux/memory_group_manager.h>
 
 #if defined(CONFIG_PM_RUNTIME) || \
 	(defined(CONFIG_PM) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 19, 0))
@@ -143,8 +144,6 @@
 #define BASE_MAX_NR_AS              16
 
 /* mmu */
-#define MIDGARD_MMU_VA_BITS 48
-
 #define MIDGARD_MMU_LEVEL(x) (x)
 
 #define MIDGARD_MMU_TOPLEVEL    MIDGARD_MMU_LEVEL(0)
@@ -166,18 +165,12 @@
 #include "mali_kbase_js_defs.h"
 #include "mali_kbase_hwaccess_defs.h"
 
-#define KBASEP_FORCE_REPLAY_DISABLED 0
-
-/* Maximum force replay limit when randomization is enabled */
-#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
-
 /* Maximum number of pages of memory that require a permanent mapping, per
  * kbase_context
  */
 #define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((1024ul * 1024ul) >> \
 								PAGE_SHIFT)
 
-
 /** Atom has been previously soft-stoppped */
 #define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
 /** Atom has been previously retried to execute */
@@ -232,6 +225,26 @@
 /* Reset the GPU after each atom completion */
 #define KBASE_SERIALIZE_RESET (1 << 2)
 
+/* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer
+ * clients, to reduce undesired system load.
+ * If a virtualizer client requests a dump within this threshold period after
+ * some other client has performed a dump, a new dump won't be performed and
+ * the accumulated counter values for that client will be returned instead.
+ */
+#define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC)
+
+/* Maximum number of clock/regulator pairs that may be referenced by
+ * the device node.
+ * This is dependent on support for of_property_read_u64_array() in the
+ * kernel.
+ */
+#if (KERNEL_VERSION(4, 0, 0) <= LINUX_VERSION_CODE) || \
+			defined(LSK_OPPV2_BACKPORT)
+#define BASE_MAX_NR_CLOCKS_REGULATORS (2)
+#else
+#define BASE_MAX_NR_CLOCKS_REGULATORS (1)
+#endif
+
 /* Forward declarations */
 struct kbase_context;
 struct kbase_device;
@@ -426,8 +439,8 @@
  * @KBASE_ATOM_ENTER_PROTECTED_CHECK:  Starting state. Check if there are any atoms
  *                      currently submitted to GPU and protected mode transition is
  *                      not already in progress.
- * @KBASE_ATOM_ENTER_PROTECTED_VINSTR: Wait for vinstr to suspend before entry into
- *                      protected mode.
+ * @KBASE_ATOM_ENTER_PROTECTED_HWCNT: Wait for hardware counter context to
+ *                      become disabled before entry into protected mode.
  * @KBASE_ATOM_ENTER_PROTECTED_IDLE_L2: Wait for the L2 to become idle in preparation
  *                      for the coherency change. L2 shall be powered down and GPU shall
  *                      come out of fully coherent mode before entering protected mode.
@@ -443,7 +456,7 @@
 	 * NOTE: The integer value of this must match KBASE_ATOM_EXIT_PROTECTED_CHECK.
 	 */
 	KBASE_ATOM_ENTER_PROTECTED_CHECK = 0,
-	KBASE_ATOM_ENTER_PROTECTED_VINSTR,
+	KBASE_ATOM_ENTER_PROTECTED_HWCNT,
 	KBASE_ATOM_ENTER_PROTECTED_IDLE_L2,
 	KBASE_ATOM_ENTER_PROTECTED_SET_COHERENCY,
 	KBASE_ATOM_ENTER_PROTECTED_FINISHED,
@@ -514,8 +527,6 @@
  * @jc:                    GPU address of the job-chain.
  * @softjob_data:          Copy of data read from the user space buffer that @jc
  *                         points to.
- * @coreref_state:         state of the atom with respect to retention of shader
- *                         cores for affinity & power management.
  * @fence:                 Stores either an input or output sync fence, depending
  *                         on soft-job type
  * @sync_waiter:           Pointer to the sync fence waiter structure passed to the
@@ -545,8 +556,6 @@
  * @slot_nr:               Job slot chosen for the atom.
  * @atom_flags:            bitmask of KBASE_KATOM_FLAG* flags capturing the exact
  *                         low level state of the atom.
- * @retry_count:           Number of times this atom has been retried. Used by replay
- *                         soft job.
  * @gpu_rb_state:          bitmnask of KBASE_ATOM_GPU_RB_* flags, precisely tracking
  *                         atom's state after it has entered Job scheduler on becoming
  *                         runnable. Atom could be blocked due to cross slot dependency
@@ -608,7 +617,6 @@
 	u32 device_nr;
 	u64 jc;
 	void *softjob_data;
-	enum kbase_atom_coreref_state coreref_state;
 #if defined(CONFIG_SYNC)
 	struct sync_fence *fence;
 	struct sync_fence_waiter sync_waiter;
@@ -684,6 +692,7 @@
 	/* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
 	enum base_jd_event_code event_code;
 	base_jd_core_req core_req;
+	u8 jobslot;
 
 	u32 ticks;
 	int sched_priority;
@@ -742,6 +751,8 @@
  * @size:	size of the buffer in bytes
  * @pages:	pointer to an array of pointers to the pages which contain
  *		the buffer
+ * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was
+ *              allocated with kcalloc
  * @nr_pages:	number of pages
  * @offset:	offset into the pages
  * @gpu_alloc:	pointer to physical memory allocated by the GPU
@@ -752,6 +763,7 @@
 struct kbase_debug_copy_buffer {
 	size_t size;
 	struct page **pages;
+	bool is_vmalloc;
 	int nr_pages;
 	size_t offset;
 	struct kbase_mem_phy_alloc *gpu_alloc;
@@ -857,6 +869,21 @@
 };
 
 /**
+ * struct kbase_fault - object containing data relating to a page or bus fault.
+ * @addr:           Records the faulting address.
+ * @extra_addr:     Records the secondary fault address.
+ * @status:         Records the fault status as reported by Hw.
+ * @protected_mode: Flag indicating whether the fault occurred in protected mode
+ *                  or not.
+ */
+struct kbase_fault {
+	u64 addr;
+	u64 extra_addr;
+	u32 status;
+	bool protected_mode;
+};
+
+/**
  * struct kbase_as   - object representing an address space of GPU.
  * @number:            Index at which this address space structure is present
  *                     in an array of address space structures embedded inside the
@@ -865,13 +892,8 @@
  *                     and Page fault handling.
  * @work_pagefault:    Work item for the Page fault handling.
  * @work_busfault:     Work item for the Bus fault handling.
- * @fault_type:        Type of fault which occured for this address space,
- *                     regular/unexpected Bus or Page fault.
- * @protected_mode:    Flag indicating whether the fault occurred in protected
- *                     mode or not.
- * @fault_status:      Records the fault status as reported by Hw.
- * @fault_addr:        Records the faulting address.
- * @fault_extra_addr:  Records the secondary fault address.
+ * @pf_data:           Data relating to page fault.
+ * @bf_data:           Data relating to bus fault.
  * @current_setup:     Stores the MMU configuration for this address space.
  * @poke_wq:           Workqueue to process the work items queue for poking the
  *                     MMU as a WA for BASE_HW_ISSUE_8316.
@@ -888,11 +910,8 @@
 	struct workqueue_struct *pf_wq;
 	struct work_struct work_pagefault;
 	struct work_struct work_busfault;
-	enum kbase_mmu_fault_type fault_type;
-	bool protected_mode;
-	u32 fault_status;
-	u64 fault_addr;
-	u64 fault_extra_addr;
+	struct kbase_fault pf_data;
+	struct kbase_fault bf_data;
 	struct kbase_mmu_setup current_setup;
 	struct workqueue_struct *poke_wq;
 	struct work_struct poke_work;
@@ -912,6 +931,9 @@
  *                        level page table of the context, this is used for
  *                        MMU HW programming as the address translation will
  *                        start from the top level page table.
+ * @group_id:             A memory group ID to be passed to a platform-specific
+ *                        memory group manager.
+ *                        Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
  * @kctx:                 If this set of MMU tables belongs to a context then
  *                        this is a back-reference to the context, otherwise
  *                        it is NULL
@@ -920,17 +942,20 @@
 	u64 *mmu_teardown_pages;
 	struct mutex mmu_lock;
 	phys_addr_t pgd;
+	u8 group_id;
 	struct kbase_context *kctx;
 };
 
-static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+static inline int kbase_as_has_bus_fault(struct kbase_as *as,
+	struct kbase_fault *fault)
 {
-	return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+	return (fault == &as->bf_data);
 }
 
-static inline int kbase_as_has_page_fault(struct kbase_as *as)
+static inline int kbase_as_has_page_fault(struct kbase_as *as,
+	struct kbase_fault *fault)
 {
-	return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+	return (fault == &as->pf_data);
 }
 
 struct kbasep_mem_device {
@@ -1003,11 +1028,6 @@
 	u8 flags;
 };
 
-struct kbasep_kctx_list_element {
-	struct list_head link;
-	struct kbase_context *kctx;
-};
-
 /**
  * Data stored per device for power management.
  *
@@ -1061,15 +1081,6 @@
 	/* Time in milliseconds between each dvfs sample */
 	u32 dvfs_period;
 
-	/* Period of GPU poweroff timer */
-	ktime_t gpu_poweroff_time;
-
-	/* Number of ticks of GPU poweroff timer before shader is powered off */
-	int poweroff_shader_ticks;
-
-	/* Number of ticks of GPU poweroff timer before GPU is powered off */
-	int poweroff_gpu_ticks;
-
 	struct kbase_pm_backend_data backend;
 };
 
@@ -1081,6 +1092,9 @@
  * @max_size:     Maximum number of free pages in the pool
  * @order:        order = 0 refers to a pool of 4 KB pages
  *                order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
+ * @group_id:     A memory group ID to be passed to a platform-specific
+ *                memory group manager, if present. Immutable.
+ *                Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
  * @pool_lock:    Lock protecting the pool - must be held when modifying
  *                @cur_size and @page_list
  * @page_list:    List of free pages in the pool
@@ -1097,7 +1111,8 @@
 	struct kbase_device *kbdev;
 	size_t              cur_size;
 	size_t              max_size;
-	size_t		    order;
+	u8                  order;
+	u8                  group_id;
 	spinlock_t          pool_lock;
 	struct list_head    page_list;
 	struct shrinker     reclaim;
@@ -1109,16 +1124,62 @@
 };
 
 /**
+ * struct kbase_mem_pool_group - a complete set of physical memory pools.
+ *
+ * Memory pools are used to allow efficient reallocation of previously-freed
+ * physical pages. A pair of memory pools is initialized for each physical
+ * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays
+ * should be indexed by physical memory group ID, the meaning of which is
+ * defined by the systems integrator.
+ *
+ * @small: Array of objects containing the state for pools of 4 KiB size
+ *         physical pages.
+ * @large: Array of objects containing the state for pools of 2 MiB size
+ *         physical pages.
+ */
+struct kbase_mem_pool_group {
+	struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS];
+	struct kbase_mem_pool large[MEMORY_GROUP_MANAGER_NR_GROUPS];
+};
+
+/**
+ * struct kbase_mem_pool_config - Initial configuration for a physical memory
+ *                                pool
+ *
+ * @max_size: Maximum number of free pages that the pool can hold.
+ */
+struct kbase_mem_pool_config {
+	size_t max_size;
+};
+
+/**
+ * struct kbase_mem_pool_group_config - Initial configuration for a complete
+ *                                      set of physical memory pools
+ *
+ * This array should be indexed by physical memory group ID, the meaning
+ * of which is defined by the systems integrator.
+ *
+ * @small: Array of initial configuration for pools of 4 KiB pages.
+ * @large: Array of initial configuration for pools of 2 MiB pages.
+ */
+struct kbase_mem_pool_group_config {
+	struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS];
+	struct kbase_mem_pool_config large[MEMORY_GROUP_MANAGER_NR_GROUPS];
+};
+
+/**
  * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP
- *                            frequency, and real frequency and core mask
+ *                            frequency, real frequencies and core mask
+ * @real_freqs: Real GPU frequencies.
+ * @opp_volts: OPP voltages.
  * @opp_freq:  Nominal OPP frequency
- * @real_freq: Real GPU frequency
  * @core_mask: Shader core mask
  */
 struct kbase_devfreq_opp {
 	u64 opp_freq;
-	u64 real_freq;
 	u64 core_mask;
+	u64 real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
+	u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS];
 };
 
 /* MMU mode flags */
@@ -1151,10 +1212,10 @@
 			struct kbase_mmu_setup * const setup);
 	void (*disable_as)(struct kbase_device *kbdev, int as_nr);
 	phys_addr_t (*pte_to_phy_addr)(u64 entry);
-	int (*ate_is_valid)(u64 ate, unsigned int level);
-	int (*pte_is_valid)(u64 pte, unsigned int level);
+	int (*ate_is_valid)(u64 ate, int level);
+	int (*pte_is_valid)(u64 pte, int level);
 	void (*entry_set_ate)(u64 *entry, struct tagged_addr phy,
-			unsigned long flags, unsigned int level);
+			unsigned long flags, int level);
 	void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
 	void (*entry_invalidate)(u64 *entry);
 	unsigned long flags;
@@ -1164,11 +1225,38 @@
 struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
 
 
-#define DEVNAME_SIZE	16
-#if defined(CONFIG_REGULATOR)
-#define KBASE_MAX_REGULATORS	2
-#endif
 
+#define DEVNAME_SIZE	16
+
+/**
+ * enum kbase_devfreq_work_type - The type of work to perform in the devfreq
+ *                                suspend/resume worker.
+ * @DEVFREQ_WORK_NONE:    Initilisation state.
+ * @DEVFREQ_WORK_SUSPEND: Call devfreq_suspend_device().
+ * @DEVFREQ_WORK_RESUME:  Call devfreq_resume_device().
+ */
+enum kbase_devfreq_work_type {
+	DEVFREQ_WORK_NONE,
+	DEVFREQ_WORK_SUSPEND,
+	DEVFREQ_WORK_RESUME
+};
+
+/**
+ * struct kbase_devfreq_queue_info - Object representing an instance for managing
+ *                                   the queued devfreq suspend/resume works.
+ * @workq:                 Workqueue for devfreq suspend/resume requests
+ * @work:                  Work item for devfreq suspend & resume
+ * @req_type:              Requested work type to be performed by the devfreq
+ *                         suspend/resume worker
+ * @acted_type:            Work type has been acted on by the worker, i.e. the
+ *                         internal recorded state of the suspend/resume
+ */
+struct kbase_devfreq_queue_info {
+	struct workqueue_struct *workq;
+	struct work_struct work;
+	enum kbase_devfreq_work_type req_type;
+	enum kbase_devfreq_work_type acted_type;
+};
 
 /**
  * struct kbase_device   - Object representing an instance of GPU platform device,
@@ -1197,12 +1285,19 @@
  * @irqs:                  Array containing IRQ resource info for 3 types of
  *                         interrupts : Job scheduling, MMU & GPU events (like
  *                         power management, cache etc.)
- * @clock:                 Pointer to the input clock resource (having an id of 0),
- *                         referenced by the GPU device node.
- * @regulator:             Pointer to the struct corresponding to the regulator
- *                         for GPU device
+ * @clocks:                Pointer to the input clock resources referenced by
+ *                         the GPU device node.
+ * @nr_clocks:             Number of clocks set in the clocks array.
+ * @regulators:            Pointer to the structs corresponding to the
+ *                         regulators referenced by the GPU device node.
+ * @nr_regulators:         Number of regulators set in the regulators array.
+ * @opp_table:             Pointer to the device OPP structure maintaining the
+ *                         link to OPPs attached to a device. This is obtained
+ *                         after setting regulator names for the device.
  * @devname:               string containing the name used for GPU device instance,
  *                         miscellaneous device is registered using the same name.
+ * @id:                    Unique identifier for the device, indicates the number of
+ *                         devices which have been created so far.
  * @model:                 Pointer, valid only when Driver is compiled to not access
  *                         the real GPU Hw, to the dummy model which tries to mimic
  *                         to some extent the state & behavior of GPU Hw in response
@@ -1227,14 +1322,16 @@
  *                         Job Scheduler, which is global to the device and is not
  *                         tied to any particular struct kbase_context running on
  *                         the device
- * @mem_pool:              Object containing the state for global pool of 4KB size
- *                         physical pages which can be used by all the contexts.
- * @lp_mem_pool:           Object containing the state for global pool of 2MB size
- *                         physical pages which can be used by all the contexts.
+ * @mem_pools:             Global pools of free physical memory pages which can
+ *                         be used by all the contexts.
  * @memdev:                keeps track of the in use physical pages allocated by
  *                         the Driver.
  * @mmu_mode:              Pointer to the object containing methods for programming
  *                         the MMU, depending on the type of MMU supported by Hw.
+ * @mgm_dev:               Pointer to the memory group manager device attached
+ *                         to the GPU device. This points to an internal memory
+ *                         group manager if no platform-specific memory group
+ *                         manager was retrieved through device tree.
  * @as:                    Array of objects representing address spaces of GPU.
  * @as_free:               Bitpattern of free/available GPU address spaces.
  * @as_to_kctx:            Array of pointers to struct kbase_context, having
@@ -1245,35 +1342,23 @@
  *                         configuration/properties of GPU HW device in use.
  * @hw_issues_mask:        List of SW workarounds for HW issues
  * @hw_features_mask:      List of available HW features.
- * @shader_needed_cnt:     Count for the 64 shader cores, incremented when
- *                         shaders are requested for use and decremented later
- *                         when they are no longer required.
- * @tiler_needed_cnt:      Count for the Tiler block shader cores, incremented
- *                         when Tiler is requested for use and decremented
- *                         later when the Tiler is no longer required.
  * @disjoint_event:        struct for keeping track of the disjoint information,
  *                         that whether the GPU is in a disjoint state and the
  *                         number of disjoint events that have occurred on GPU.
- * @l2_users_count:        Refcount for tracking users of the l2 cache, e.g.
- *                         when using hardware counter instrumentation.
- * @shader_available_bitmap: Bitmap of shader cores that are currently available,
- *                         powered up and the power policy is happy for jobs
- *                         to be submitted to these cores. These are updated
- *                         by the power management code. The job scheduler
- *                         should avoid submitting new jobs to any cores
- *                         that are not marked as available.
- * @tiler_available_bitmap: Bitmap of tiler units that are currently available.
- * @l2_available_bitmap:    Bitmap of the currently available Level 2 caches.
- * @stack_available_bitmap: Bitmap of the currently available Core stacks.
- * @shader_ready_bitmap:    Bitmap of shader cores that are ready (powered on)
- * @shader_transitioning_bitmap: Bitmap of shader cores that are currently changing
- *                         power state.
  * @nr_hw_address_spaces:  Number of address spaces actually available in the
  *                         GPU, remains constant after driver initialisation.
  * @nr_user_address_spaces: Number of address spaces available to user contexts
  * @hwcnt:                  Structure used for instrumentation and HW counters
  *                         dumping
- * @vinstr_ctx:            vinstr context created per device
+ * @hwcnt_gpu_iface:       Backend interface for GPU hardware counter access.
+ * @hwcnt_gpu_ctx:         Context for GPU hardware counter access.
+ *                         @hwaccess_lock must be held when calling
+ *                         kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx.
+ * @hwcnt_gpu_virt:        Virtualizer for GPU hardware counters.
+ * @vinstr_ctx:            vinstr context created per device.
+ * @timeline_is_enabled:   Non zero, if there is at least one timeline client,
+ *                         zero otherwise.
+ * @timeline:              Timeline context created per device.
  * @trace_lock:            Lock to serialize the access to trace buffer.
  * @trace_first_out:       Index/offset in the trace buffer at which the first
  *                         unread message is present.
@@ -1285,38 +1370,50 @@
  * @reset_timeout_ms:      Number of milliseconds to wait for the soft stop to
  *                         complete for the GPU jobs before proceeding with the
  *                         GPU reset.
- * @cacheclean_lock:       Lock to serialize the clean & invalidation of GPU caches,
- *                         between Job Manager backend & Instrumentation code.
+ * @cache_clean_in_progress: Set when a cache clean has been started, and
+ *                         cleared when it has finished. This prevents multiple
+ *                         cache cleans being done simultaneously.
+ * @cache_clean_queued:    Set if a cache clean is invoked while another is in
+ *                         progress. If this happens, another cache clean needs
+ *                         to be triggered immediately after completion of the
+ *                         current one.
+ * @cache_clean_wait:      Signalled when a cache clean has finished.
  * @platform_context:      Platform specific private data to be accessed by
  *                         platform specific config files only.
- * @kctx_list:             List of kbase_contexts created for the device, including
- *                         the kbase_context created for vinstr_ctx.
+ * @kctx_list:             List of kbase_contexts created for the device,
+ *                         including any contexts that might be created for
+ *                         hardware counters.
  * @kctx_list_lock:        Lock protecting concurrent accesses to @kctx_list.
  * @devfreq_profile:       Describes devfreq profile for the Mali GPU device, passed
  *                         to devfreq_add_device() to add devfreq feature to Mali
  *                         GPU device.
  * @devfreq:               Pointer to devfreq structure for Mali GPU device,
  *                         returned on the call to devfreq_add_device().
- * @current_freq:          The real frequency, corresponding to @current_nominal_freq,
- *                         at which the Mali GPU device is currently operating, as
- *                         retrieved from @opp_table in the target callback of
+ * @current_freqs:         The real frequencies, corresponding to
+ *                         @current_nominal_freq, at which the Mali GPU device
+ *                         is currently operating, as retrieved from
+ *                         @devfreq_table in the target callback of
  *                         @devfreq_profile.
  * @current_nominal_freq:  The nominal frequency currently used for the Mali GPU
  *                         device as retrieved through devfreq_recommended_opp()
  *                         using the freq value passed as an argument to target
  *                         callback of @devfreq_profile
- * @current_voltage:       The voltage corresponding to @current_nominal_freq, as
- *                         retrieved through dev_pm_opp_get_voltage().
+ * @current_voltages:      The voltages corresponding to @current_nominal_freq,
+ *                         as retrieved from @devfreq_table in the target
+ *                         callback of @devfreq_profile.
  * @current_core_mask:     bitmask of shader cores that are currently desired &
  *                         enabled, corresponding to @current_nominal_freq as
- *                         retrieved from @opp_table in the target callback of
- *                         @devfreq_profile.
- * @opp_table:             Pointer to the lookup table for converting between nominal
- *                         OPP (operating performance point) frequency, and real
- *                         frequency and core mask. This table is constructed according
- *                         to operating-points-v2-mali table in devicetree.
+ *                         retrieved from @devfreq_table in the target callback
+ *                         of @devfreq_profile.
+ * @devfreq_table:         Pointer to the lookup table for converting between
+ *                         nominal OPP (operating performance point) frequency,
+ *                         and real frequency and core mask. This table is
+ *                         constructed according to operating-points-v2-mali
+ *                         table in devicetree.
  * @num_opps:              Number of operating performance points available for the Mali
  *                         GPU device.
+ * @devfreq_queue:         Per device object for storing data that manages devfreq
+ *                         suspend & resume request queue and the related items.
  * @devfreq_cooling:       Pointer returned on registering devfreq cooling device
  *                         corresponding to @devfreq.
  * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected
@@ -1325,7 +1422,9 @@
  *                         previously entered protected mode.
  * @ipa:                   Top level structure for IPA, containing pointers to both
  *                         configured & fallback models.
- * @timeline:              Stores the global timeline tracking information.
+ * @previous_frequency:    Previous frequency of GPU clock used for
+ *                         BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is
+ *                         restored when L2 is powered on.
  * @job_fault_debug:       Flag to control the dumping of debug data for job faults,
  *                         set when the 'job_fault' debugfs file is opened.
  * @mali_debugfs_directory: Root directory for the debugfs files created by the driver
@@ -1347,20 +1446,6 @@
  * @job_fault_event_lock:  Lock to protect concurrent accesses to @job_fault_event_list
  * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs
  *                         file "read_register".
- * @kbase_profiling_controls: Profiling controls set by gator to control frame buffer
- *                         dumping and s/w counter reporting.
- * @force_replay_limit:    Number of gpu jobs, having replay atoms associated with them,
- *                         that are run before a job is forced to fail and replay.
- *                         Set to 0 to disable forced failures.
- * @force_replay_count:    Count of gpu jobs, having replay atoms associated with them,
- *                         between forced failures. Incremented on each gpu job which
- *                         has replay atoms dependent on it. A gpu job is forced to
- *                         fail once this is greater than or equal to @force_replay_limit
- * @force_replay_core_req: Core requirements, set through the sysfs file, for the replay
- *                         job atoms to consider the associated gpu job for forceful
- *                         failure and replay. May be zero
- * @force_replay_random:   Set to 1 to randomize the @force_replay_limit, in the
- *                         range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
  * @ctx_num:               Total number of contexts created for the device.
  * @io_history:            Pointer to an object keeping a track of all recent
  *                         register accesses. The history of register accesses
@@ -1373,8 +1458,8 @@
  *                         power on for GPU is started.
  * @infinite_cache_active_default: Set to enable using infinite cache for all the
  *                         allocations of a new context.
- * @mem_pool_max_size_default: Initial/default value for the maximum size of both
- *                         types of pool created for a new context.
+ * @mem_pool_defaults:     Default configuration for the group of memory pools
+ *                         created for a new context.
  * @current_gpu_coherency_mode: coherency mode in use, which can be different
  *                         from @system_coherency, when using protected mode.
  * @system_coherency:      coherency mode as retrieved from the device tree.
@@ -1391,6 +1476,13 @@
  * @protected_mode:        set to TRUE when GPU is put into protected mode
  * @protected_mode_transition: set to TRUE when GPU is transitioning into or
  *                         out of protected mode.
+ * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be
+ *                         enabled. Counters must be disabled before transition
+ *                         into protected mode.
+ * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not
+ *                         enabled.
+ * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware
+ *                         counters, used if atomic disable is not possible.
  * @protected_mode_support: set to true if protected mode is supported.
  * @buslogger:              Pointer to the structure required for interfacing
  *                          with the bus logger module to set the size of buffer
@@ -1411,6 +1503,10 @@
  *                          on disabling of GWT.
  * @js_ctx_scheduling_mode: Context scheduling mode currently being used by
  *                          Job Scheduler
+ * @l2_size_override:       Used to set L2 cache size via device tree blob
+ * @l2_hash_override:       Used to set L2 cache hash via device tree blob
+ * @policy_list:            A filtered list of policies available in the system.
+ * @policy_count:           Number of policies in the @policy_list.
  */
 struct kbase_device {
 	u32 hw_quirks_sc;
@@ -1430,13 +1526,17 @@
 		int flags;
 	} irqs[3];
 
-	struct clk *clock;
+	struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS];
+	unsigned int nr_clocks;
 #ifdef CONFIG_REGULATOR
-	int regulator_num;
-	struct regulator *regulator[KBASE_MAX_REGULATORS];
-	struct opp_table *dev_opp_table;
-#endif
+	struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS];
+	unsigned int nr_regulators;
+#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
+	struct opp_table *opp_table;
+#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* CONFIG_REGULATOR */
 	char devname[DEVNAME_SIZE];
+	u32  id;
 
 #ifdef CONFIG_MALI_NO_MALI
 	void *model;
@@ -1450,11 +1550,12 @@
 
 	struct kbase_pm_device_data pm;
 	struct kbasep_js_device_data js_data;
-	struct kbase_mem_pool mem_pool;
-	struct kbase_mem_pool lp_mem_pool;
+	struct kbase_mem_pool_group mem_pools;
 	struct kbasep_mem_device memdev;
 	struct kbase_mmu_mode const *mmu_mode;
 
+	struct memory_group_manager_device *mgm_dev;
+
 	struct kbase_as as[BASE_MAX_NR_AS];
 	u16 as_free; /* Bitpattern of free Address Spaces */
 	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
@@ -1466,24 +1567,11 @@
 	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
 	unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
 
-	u32 tiler_needed_cnt;
-	u32 shader_needed_cnt;
-
 	struct {
 		atomic_t count;
 		atomic_t state;
 	} disjoint_event;
 
-	u32 l2_users_count;
-
-	u64 shader_available_bitmap;
-	u64 tiler_available_bitmap;
-	u64 l2_available_bitmap;
-	u64 stack_available_bitmap;
-
-	u64 shader_ready_bitmap;
-	u64 shader_transitioning_bitmap;
-
 	s8 nr_hw_address_spaces;
 	s8 nr_user_address_spaces;
 
@@ -1493,12 +1581,19 @@
 
 		struct kbase_context *kctx;
 		u64 addr;
+		u64 addr_bytes;
 
 		struct kbase_instr_backend backend;
 	} hwcnt;
 
+	struct kbase_hwcnt_backend_interface hwcnt_gpu_iface;
+	struct kbase_hwcnt_context *hwcnt_gpu_ctx;
+	struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt;
 	struct kbase_vinstr_context *vinstr_ctx;
 
+	atomic_t               timeline_is_enabled;
+	struct kbase_timeline *timeline;
+
 #if KBASE_TRACE_ENABLE
 	spinlock_t              trace_lock;
 	u16                     trace_first_out;
@@ -1508,7 +1603,9 @@
 
 	u32 reset_timeout_ms;
 
-	struct mutex cacheclean_lock;
+	bool cache_clean_in_progress;
+	bool cache_clean_queued;
+	wait_queue_head_t cache_clean_wait;
 
 	void *platform_context;
 
@@ -1518,23 +1615,18 @@
 #ifdef CONFIG_MALI_DEVFREQ
 	struct devfreq_dev_profile devfreq_profile;
 	struct devfreq *devfreq;
-	unsigned long current_freq;
+	unsigned long current_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
 	unsigned long current_nominal_freq;
-	unsigned long current_voltage[KBASE_MAX_REGULATORS];
+	unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS];
 	u64 current_core_mask;
-	struct kbase_devfreq_opp *opp_table;
+	struct kbase_devfreq_opp *devfreq_table;
 	int num_opps;
 	struct kbasep_pm_metrics last_devfreq_metrics;
-	struct {
-		void (*voltage_range_check)(struct kbase_device *kbdev,
-					    unsigned long *voltages);
-#ifdef CONFIG_REGULATOR
-		int (*set_voltages)(struct kbase_device *kbdev,
-				    unsigned long *voltages, bool inc);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)
+	struct kbase_devfreq_queue_info devfreq_queue;
 #endif
-		int (*set_frequency)(struct kbase_device *kbdev,
-				     unsigned long freq);
-	} devfreq_ops;
+
 #ifdef CONFIG_DEVFREQ_THERMAL
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 	struct devfreq_cooling_device *devfreq_cooling;
@@ -1553,31 +1645,17 @@
 		 * the difference between last_metrics and the current values.
 		 */
 		struct kbasep_pm_metrics last_metrics;
-
-		/*
-		 * gpu_active_callback - Inform IPA that GPU is now active
-		 * @model_data: Pointer to model data
-		 */
-		void (*gpu_active_callback)(
-				struct kbase_ipa_model_vinstr_data *model_data);
-
-		/*
-		 * gpu_idle_callback - Inform IPA that GPU is now idle
-		 * @model_data: Pointer to model data
-		 */
-		void (*gpu_idle_callback)(
-				struct kbase_ipa_model_vinstr_data *model_data);
-
 		/* Model data to pass to ipa_gpu_active/idle() */
 		struct kbase_ipa_model_vinstr_data *model_data;
 
-		/* true if IPA is currently using vinstr */
-		bool vinstr_active;
+		/* true if use of fallback model has been forced by the User */
+		bool force_fallback_model;
 	} ipa;
 #endif /* CONFIG_DEVFREQ_THERMAL */
 #endif /* CONFIG_MALI_DEVFREQ */
+	unsigned long previous_frequency;
 
-	bool job_fault_debug;
+	atomic_t job_fault_debug;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *mali_debugfs_directory;
@@ -1600,16 +1678,6 @@
 #endif /* !MALI_CUSTOMER_RELEASE */
 #endif /* CONFIG_DEBUG_FS */
 
-	u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
-
-
-#if MALI_CUSTOMER_RELEASE == 0
-	int force_replay_limit;
-	int force_replay_count;
-	base_jd_core_req force_replay_core_req;
-	bool force_replay_random;
-#endif
-
 	atomic_t ctx_num;
 
 #ifdef CONFIG_DEBUG_FS
@@ -1628,7 +1696,7 @@
 #else
 	u32 infinite_cache_active_default;
 #endif
-	size_t mem_pool_max_size_default;
+	struct kbase_mem_pool_group_config mem_pool_defaults;
 
 	u32 current_gpu_coherency_mode;
 	u32 system_coherency;
@@ -1646,9 +1714,15 @@
 
 	bool protected_mode_transition;
 
+	bool protected_mode_hwcnt_desired;
+
+	bool protected_mode_hwcnt_disabled;
+
+	struct work_struct protected_mode_hwcnt_disable_work;
+
 	bool protected_mode_support;
 
-#ifdef CONFIG_MALI_FPGA_BUS_LOGGER
+#ifdef CONFIG_MALI_BUSLOG
 	struct bus_logger_client *buslogger;
 #endif
 
@@ -1663,13 +1737,19 @@
 	/* See KBASE_SERIALIZE_* for details */
 	u8 serialize_jobs;
 
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 	u8 backup_serialize_jobs;
 #endif
 
+	u8 l2_size_override;
+	u8 l2_hash_override;
+
 	/* See KBASE_JS_*_PRIORITY_MODE for details. */
 	u32 js_ctx_scheduling_mode;
 
+
+	const struct kbase_pm_policy *policy_list[KBASE_PM_MAX_NUM_POLICIES];
+	int policy_count;
 };
 
 /**
@@ -1693,6 +1773,56 @@
 					 ((0 & 0xFF) << 0))
 
 /**
+ * enum kbase_file_state - Initialization state of a file opened by @kbase_open
+ *
+ * @KBASE_FILE_NEED_VSN:        Initial state, awaiting API version.
+ * @KBASE_FILE_VSN_IN_PROGRESS: Indicates if setting an API version is in
+ *                              progress and other setup calls shall be
+ *                              rejected.
+ * @KBASE_FILE_NEED_CTX:        Indicates if the API version handshake has
+ *                              completed, awaiting context creation flags.
+ * @KBASE_FILE_CTX_IN_PROGRESS: Indicates if the context's setup is in progress
+ *                              and other setup calls shall be rejected.
+ * @KBASE_FILE_COMPLETE:        Indicates if the setup for context has
+ *                              completed, i.e. flags have been set for the
+ *                              context.
+ *
+ * The driver allows only limited interaction with user-space until setup
+ * is complete.
+ */
+enum kbase_file_state {
+	KBASE_FILE_NEED_VSN,
+	KBASE_FILE_VSN_IN_PROGRESS,
+	KBASE_FILE_NEED_CTX,
+	KBASE_FILE_CTX_IN_PROGRESS,
+	KBASE_FILE_COMPLETE
+};
+
+/**
+ * struct kbase_file - Object representing a file opened by @kbase_open
+ *
+ * @kbdev:               Object representing an instance of GPU platform device,
+ *                       allocated from the probe method of the Mali driver.
+ * @filp:                Pointer to the struct file corresponding to device file
+ *                       /dev/malixx instance, passed to the file's open method.
+ * @kctx:                Object representing an entity, among which GPU is
+ *                       scheduled and which gets its own GPU address space.
+ *                       Invalid until @setup_state is KBASE_FILE_COMPLETE.
+ * @api_version:         Contains the version number for User/kernel interface,
+ *                       used for compatibility check. Invalid until
+ *                       @setup_state is KBASE_FILE_NEED_CTX.
+ * @setup_state:         Initialization state of the file. Values come from
+ *                       the kbase_file_state enumeration.
+ */
+struct kbase_file {
+	struct kbase_device  *kbdev;
+	struct file          *filp;
+	struct kbase_context *kctx;
+	unsigned long         api_version;
+	atomic_t              setup_state;
+};
+
+/**
  * enum kbase_context_flags - Flags for kbase contexts
  *
  * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
@@ -1773,17 +1903,16 @@
 	DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
 };
 
-
 /**
- * struct kbase_context - Object representing an entity, among which GPU is
- *                        scheduled and gets its own GPU address space.
- *                        Created when the device file /dev/malixx is opened.
+ * struct kbase_context - Kernel base context
+ *
  * @filp:                 Pointer to the struct file corresponding to device file
  *                        /dev/malixx instance, passed to the file's open method.
  * @kbdev:                Pointer to the Kbase device for which the context is created.
+ * @kctx_list_link:       Node into Kbase device list of contexts.
  * @mmu:                  Structure holding details of the MMU tables for this
  *                        context
- * @id:                   Unique indentifier for the context, indicates the number of
+ * @id:                   Unique identifier for the context, indicates the number of
  *                        contexts which have been created for the device so far.
  * @api_version:          contains the version number for User/kernel interface,
  *                        used for compatibility check.
@@ -1798,18 +1927,11 @@
  *                        should stop posting events and also inform event handling
  *                        thread that context termination is in progress.
  * @event_workq:          Workqueue for processing work items corresponding to atoms
- *                        that do not return an event to Userspace or have to perform
- *                        a replay job
+ *                        that do not return an event to userspace.
  * @event_count:          Count of the posted events to be consumed by Userspace.
  * @event_coalesce_count: Count of the events present in @event_coalesce_list.
  * @flags:                bitmap of enums from kbase_context_flags, indicating the
  *                        state & attributes for the context.
- * @setup_complete:       Indicates if the setup for context has completed, i.e.
- *                        flags have been set for the context. Driver allows only
- *                        2 ioctls until the setup is done. Valid only for
- *                        @api_version value 0.
- * @setup_in_progress:    Indicates if the context's setup is in progress and other
- *                        setup calls during that shall be rejected.
  * @aliasing_sink_page:   Special page used for KBASE_MEM_TYPE_ALIAS allocations,
  *                        which can alias number of memory regions. The page is
  *                        represent a region where it is mapped with a write-alloc
@@ -1828,6 +1950,9 @@
  *                        having the same value for GPU & CPU virtual address.
  * @reg_rbtree_custom:    RB tree of the memory regions allocated from the CUSTOM_VA
  *                        zone of the GPU virtual address space.
+ * @reg_rbtree_exec:      RB tree of the memory regions allocated from the EXEC_VA
+ *                        zone of the GPU virtual address space. Used for GPU-executable
+ *                        allocations which don't need the SAME_VA property.
  * @cookies:              Bitmask containing of BITS_PER_LONG bits, used mainly for
  *                        SAME_VA allocations to defer the reservation of memory region
  *                        (from the GPU virtual address space) from base_mem_alloc
@@ -1842,10 +1967,16 @@
  * @event_queue:          Wait queue used for blocking the thread, which consumes
  *                        the base_jd_event corresponding to an atom, when there
  *                        are no more posted events.
- * @tgid:                 thread group id of the process, whose thread opened the
- *                        device file /dev/malixx instance to create a context.
- * @pid:                  id of the thread, corresponding to process @tgid, which
- *                        actually which opened the device file.
+ * @tgid:                 Thread group ID of the process whose thread created
+ *                        the context (by calling KBASE_IOCTL_VERSION_CHECK or
+ *                        KBASE_IOCTL_SET_FLAGS, depending on the @api_version).
+ *                        This is usually, but not necessarily, the same as the
+ *                        process whose thread opened the device file
+ *                        /dev/malixx instance.
+ * @pid:                  ID of the thread, corresponding to process @tgid,
+ *                        which actually created the context. This is usually,
+ *                        but not necessarily, the same as the thread which
+ *                        opened the device file /dev/malixx instance.
  * @jctx:                 object encapsulating all the Job dispatcher related state,
  *                        including the array of atoms.
  * @used_pages:           Keeps a track of the number of 4KB physical pages in use
@@ -1854,10 +1985,7 @@
  *                        when special tracking page is freed by userspace where it
  *                        is reset to 0.
  * @permanent_mapped_pages: Usage count of permanently mapped memory
- * @mem_pool:             Object containing the state for the context specific pool of
- *                        4KB size physical pages.
- * @lp_mem_pool:          Object containing the state for the context specific pool of
- *                        2MB size physical pages.
+ * @mem_pools:            Context-specific pools of free physical memory pages.
  * @reclaim:              Shrinker object registered with the kernel containing
  *                        the pointer to callback function which is invoked under
  *                        low memory conditions. In the callback function Driver
@@ -1900,8 +2028,10 @@
  *                        pages used for GPU allocations, done for the context,
  *                        to the memory consumed by the process.
  * @same_va_end:          End address of the SAME_VA zone (in 4KB page units)
- * @timeline:             Object tracking the number of atoms currently in flight for
- *                        the context and thread group id of the process, i.e. @tgid.
+ * @exec_va_start:        Start address of the EXEC_VA zone (in 4KB page units)
+ *                        or U64_MAX if the EXEC_VA zone is uninitialized.
+ * @gpu_va_end:           End address of the GPU va space (in 4KB page units)
+ * @jit_va:               Indicates if a JIT_VA zone has been created.
  * @mem_profile_data:     Buffer containing the profiling information provided by
  *                        Userspace, can be read through the mem_profile debugfs file.
  * @mem_profile_size:     Size of the @mem_profile_data.
@@ -1934,9 +2064,11 @@
  * @slots_pullable:       Bitmask of slots, indicating the slots for which the
  *                        context has pullable atoms in the runnable tree.
  * @work:                 Work structure used for deferred ASID assignment.
- * @vinstr_cli:           Pointer to the legacy userspace vinstr client, there can
- *                        be only such client per kbase context.
- * @vinstr_cli_lock:      Lock used for the vinstr ioctl calls made for @vinstr_cli.
+ * @legacy_hwcnt_cli:     Pointer to the legacy userspace hardware counters
+ *                        client, there can be only such client per kbase
+ *                        context.
+ * @legacy_hwcnt_lock:    Lock used to prevent concurrent access to
+ *                        @legacy_hwcnt_cli.
  * @completed_jobs:       List containing completed atoms for which base_jd_event is
  *                        to be posted.
  * @work_count:           Number of work items, corresponding to atoms, currently
@@ -1953,6 +2085,9 @@
  *                        old or new version of interface for JIT allocations
  *	                  1 -> client used KBASE_IOCTL_MEM_JIT_INIT_OLD
  *	                  2 -> client used KBASE_IOCTL_MEM_JIT_INIT
+ * @jit_group_id:         A memory group ID to be passed to a platform-specific
+ *                        memory group manager.
+ *                        Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
  * @jit_active_head:      List containing the JIT allocations which are in use.
  * @jit_pool_head:        List containing the JIT allocations which have been
  *                        freed up by userpsace and so not being used by them.
@@ -1992,10 +2127,17 @@
  * @priority:             Indicates the context priority. Used along with @atoms_count
  *                        for context scheduling, protected by hwaccess_lock.
  * @atoms_count:          Number of gpu atoms currently in use, per priority
+ *
+ * A kernel base context is an entity among which the GPU is scheduled.
+ * Each context has its own GPU address space.
+ * Up to one context can be created for each client that opens the device file
+ * /dev/malixx. Context creation is deferred until a special ioctl() system call
+ * is made on the device file.
  */
 struct kbase_context {
 	struct file *filp;
 	struct kbase_device *kbdev;
+	struct list_head kctx_list_link;
 	struct kbase_mmu_table mmu;
 
 	u32 id;
@@ -2010,9 +2152,6 @@
 
 	atomic_t flags;
 
-	atomic_t                setup_complete;
-	atomic_t                setup_in_progress;
-
 	struct tagged_addr aliasing_sink_page;
 
 	spinlock_t              mem_partials_lock;
@@ -2021,6 +2160,7 @@
 	struct mutex            reg_lock;
 	struct rb_root reg_rbtree_same;
 	struct rb_root reg_rbtree_custom;
+	struct rb_root reg_rbtree_exec;
 
 
 	unsigned long    cookies;
@@ -2033,10 +2173,9 @@
 	struct kbase_jd_context jctx;
 	atomic_t used_pages;
 	atomic_t         nonmapped_pages;
-	unsigned long permanent_mapped_pages;
+	atomic_t permanent_mapped_pages;
 
-	struct kbase_mem_pool mem_pool;
-	struct kbase_mem_pool lp_mem_pool;
+	struct kbase_mem_pool_group mem_pools;
 
 	struct shrinker         reclaim;
 	struct list_head        evict_list;
@@ -2054,16 +2193,12 @@
 
 	atomic_t refcount;
 
-
-	/* NOTE:
-	 *
-	 * Flags are in jctx.sched_info.ctx.flags
-	 * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
-	 *
-	 * All other flags must be added there */
 	spinlock_t         mm_update_lock;
 	struct mm_struct __rcu *process_mm;
 	u64 same_va_end;
+	u64 exec_va_start;
+	u64 gpu_va_end;
+	bool jit_va;
 
 #ifdef CONFIG_DEBUG_FS
 	char *mem_profile_data;
@@ -2091,8 +2226,8 @@
 
 	struct work_struct work;
 
-	struct kbase_vinstr_client *vinstr_cli;
-	struct mutex vinstr_cli_lock;
+	struct kbase_hwcnt_legacy_client *legacy_hwcnt_cli;
+	struct mutex legacy_hwcnt_lock;
 
 	struct list_head completed_jobs;
 	atomic_t work_count;
@@ -2104,6 +2239,7 @@
 	u8 jit_current_allocations;
 	u8 jit_current_allocations_per_bin[256];
 	u8 jit_version;
+	u8 jit_group_id;
 	struct list_head jit_active_head;
 	struct list_head jit_pool_head;
 	struct list_head jit_destroy_head;
@@ -2121,7 +2257,7 @@
 
 	u8 trim_level;
 
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 	bool gwt_enabled;
 
 	bool gwt_was_enabled;
@@ -2135,7 +2271,7 @@
 	s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
 };
 
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 /**
  * struct kbasep_gwt_list_element - Structure used to collect GPU
  *                                  write faults.
@@ -2215,10 +2351,7 @@
 /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
 #define KBASE_CLEAN_CACHE_MAX_LOOPS     100000
 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
-#define KBASE_AS_INACTIVE_MAX_LOOPS     100000
-
-/* Maximum number of times a job can be replayed */
-#define BASEP_JD_REPLAY_LIMIT 15
+#define KBASE_AS_INACTIVE_MAX_LOOPS     100000000
 
 /* JobDescriptorHeader - taken from the architecture specifications, the layout
  * is currently identical for all GPU archs. */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c
index 639092b..a265082 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_device.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,8 +39,6 @@
 #include <mali_kbase_hw.h>
 #include <mali_kbase_config_defaults.h>
 
-#include <mali_kbase_profiling_gator_api.h>
-
 /* NOTE: Magic - 0x45435254 (TRCE in ASCII).
  * Supports tracing feature provided in the base module.
  * Please keep it in sync with the value of base module.
@@ -70,20 +68,11 @@
 
 static int kbase_device_as_init(struct kbase_device *kbdev, int i)
 {
-	const char format[] = "mali_mmu%d";
-	char name[sizeof(format)];
-	const char poke_format[] = "mali_mmu%d_poker";
-	char poke_name[sizeof(poke_format)];
-
-	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
-		snprintf(poke_name, sizeof(poke_name), poke_format, i);
-
-	snprintf(name, sizeof(name), format, i);
-
 	kbdev->as[i].number = i;
-	kbdev->as[i].fault_addr = 0ULL;
+	kbdev->as[i].bf_data.addr = 0ULL;
+	kbdev->as[i].pf_data.addr = 0ULL;
 
-	kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i);
 	if (!kbdev->as[i].pf_wq)
 		return -EINVAL;
 
@@ -94,7 +83,8 @@
 		struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
 		struct work_struct *poke_work = &kbdev->as[i].poke_work;
 
-		kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+		kbdev->as[i].poke_wq =
+			alloc_workqueue("mali_mmu%d_poker", 0, 1, i);
 		if (!kbdev->as[i].poke_wq) {
 			destroy_workqueue(kbdev->as[i].pf_wq);
 			return -EINVAL;
@@ -148,7 +138,7 @@
 
 int kbase_device_init(struct kbase_device * const kbdev)
 {
-	int i, err;
+	int err;
 #ifdef CONFIG_ARM64
 	struct device_node *np = NULL;
 #endif /* CONFIG_ARM64 */
@@ -211,19 +201,6 @@
 	if (err)
 		goto dma_set_mask_failed;
 
-	if (!kbdev->dev->dma_parms) {
-		kbdev->dev->dma_parms = devm_kzalloc(kbdev->dev,
-				sizeof(*kbdev->dev->dma_parms), GFP_KERNEL);
-		if (!kbdev->dev->dma_parms) {
-			err = -ENOMEM;
-			goto dma_set_mask_failed;
-		}
-	}
-	err = dma_set_max_seg_size(kbdev->dev,
-			DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
-	if (err)
-		goto dma_set_mask_failed;
-
 	kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
 
 	err = kbase_device_all_as_init(kbdev);
@@ -236,11 +213,7 @@
 	if (err)
 		goto term_as;
 
-	mutex_init(&kbdev->cacheclean_lock);
-
-	/* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
-	for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
-		kbdev->kbase_profiling_controls[i] = 0;
+	init_waitqueue_head(&kbdev->cache_clean_wait);
 
 	kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
 
@@ -259,6 +232,9 @@
 	else
 		kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
 
+	mutex_init(&kbdev->kctx_list_lock);
+	INIT_LIST_HEAD(&kbdev->kctx_list);
+
 	return 0;
 term_trace:
 	kbasep_trace_term(kbdev);
@@ -274,6 +250,8 @@
 {
 	KBASE_DEBUG_ASSERT(kbdev);
 
+	WARN_ON(!list_empty(&kbdev->kctx_list));
+
 #if KBASE_TRACE_ENABLE
 	kbase_debug_assert_register_hook(NULL, NULL);
 #endif
@@ -515,6 +493,7 @@
 }
 
 static const struct file_operations kbasep_trace_debugfs_fops = {
+	.owner = THIS_MODULE,
 	.open = kbasep_trace_debugfs_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -556,39 +535,3 @@
 	CSTD_UNUSED(kbdev);
 }
 #endif				/* KBASE_TRACE_ENABLE  */
-
-void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
-{
-	switch (control) {
-	case FBDUMP_CONTROL_ENABLE:
-		/* fall through */
-	case FBDUMP_CONTROL_RATE:
-		/* fall through */
-	case SW_COUNTER_ENABLE:
-		/* fall through */
-	case FBDUMP_CONTROL_RESIZE_FACTOR:
-		kbdev->kbase_profiling_controls[control] = value;
-		break;
-	default:
-		dev_err(kbdev->dev, "Profiling control %d not found\n", control);
-		break;
-	}
-}
-
-/*
- * Called by gator to control the production of
- * profiling information at runtime
- * */
-
-void _mali_profiling_control(u32 action, u32 value)
-{
-	struct kbase_device *kbdev = NULL;
-
-	/* find the first i.e. call with -1 */
-	kbdev = kbase_find_device(-1);
-
-	if (NULL != kbdev)
-		kbase_set_profiling_control(kbdev, action, value);
-}
-KBASE_EXPORT_SYMBOL(_mali_profiling_control);
-
diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c
index 3c9cef3..721af69 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_event.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2016,2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016,2018-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,11 +24,12 @@
 
 #include <mali_kbase.h>
 #include <mali_kbase_debug.h>
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
 
 static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
 	struct base_jd_udata data;
+	struct kbase_device *kbdev;
 
 	lockdep_assert_held(&kctx->jctx.lock);
 
@@ -36,10 +37,11 @@
 	KBASE_DEBUG_ASSERT(katom != NULL);
 	KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
 
+	kbdev = kctx->kbdev;
 	data = katom->udata;
 
-	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(katom, kctx);
-	KBASE_TLSTREAM_TL_DEL_ATOM(katom);
+	KBASE_TLSTREAM_TL_NRET_ATOM_CTX(kbdev, katom, kctx);
+	KBASE_TLSTREAM_TL_DEL_ATOM(kbdev, katom);
 
 	katom->status = KBASE_JD_ATOM_STATE_UNUSED;
 
@@ -170,6 +172,8 @@
 
 void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
 {
+	struct kbase_device *kbdev = ctx->kbdev;
+
 	if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
 		if (atom->event_code == BASE_JD_EVENT_DONE) {
 			/* Don't report the event */
@@ -183,7 +187,7 @@
 		kbase_event_process_noreport(ctx, atom);
 		return;
 	}
-	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_POSTED);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, atom, TL_ATOM_STATE_POSTED);
 	if (atom->core_req & BASE_JD_REQ_EVENT_COALESCE) {
 		/* Don't report the event until other event(s) have completed */
 		mutex_lock(&ctx->event_mutex);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_fence.c b/drivers/gpu/arm/midgard/mali_kbase_fence.c
index c75be1a..96a6ab9 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_fence.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_fence.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -67,7 +67,11 @@
 kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
 #endif
 {
+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
+	snprintf(str, size, "%u", fence->seqno);
+#else
 	snprintf(str, size, "%llu", fence->seqno);
+#endif
 }
 
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
@@ -177,6 +181,7 @@
 	kbase_fence_cb->fence = fence;
 	kbase_fence_cb->katom = katom;
 	INIT_LIST_HEAD(&kbase_fence_cb->node);
+	atomic_inc(&katom->dma_fence.dep_count);
 
 	err = dma_fence_add_callback(fence, &kbase_fence_cb->fence_cb,
 				     callback);
@@ -189,15 +194,16 @@
 			err = 0;
 
 		kfree(kbase_fence_cb);
+		atomic_dec(&katom->dma_fence.dep_count);
 	} else if (err) {
 		kfree(kbase_fence_cb);
+		atomic_dec(&katom->dma_fence.dep_count);
 	} else {
 		/*
 		 * Get reference to fence that will be kept until callback gets
 		 * cleaned up in kbase_fence_free_callbacks().
 		 */
 		dma_fence_get(fence);
-		atomic_inc(&katom->dma_fence.dep_count);
 		/* Add callback to katom's list of callbacks */
 		list_add(&kbase_fence_cb->node, &katom->dma_fence.callbacks);
 	}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator.h b/drivers/gpu/arm/midgard/mali_kbase_gator.h
index 4f54817..6428f08 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gator.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,23 +27,26 @@
  * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
  * they are unknown inside gator.
  */
+
 #ifndef _KBASE_GATOR_H_
 #define _KBASE_GATOR_H_
 
-#ifdef CONFIG_MALI_GATOR_SUPPORT
-#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#include <linux/types.h>
+
 #define GATOR_JOB_SLOT_START 1
 #define GATOR_JOB_SLOT_STOP  2
 #define GATOR_JOB_SLOT_SOFT_STOPPED  3
 
-void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
-void kbase_trace_mali_pm_status(u32 event, u64 value);
-void kbase_trace_mali_pm_power_off(u32 event, u64 value);
-void kbase_trace_mali_pm_power_on(u32 event, u64 value);
-void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
-void kbase_trace_mali_mmu_as_in_use(int event);
-void kbase_trace_mali_mmu_as_released(int event);
-void kbase_trace_mali_total_alloc_pages_change(long long int event);
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+
+struct kbase_context;
+
+void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value);
+void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event);
 
 #endif /* CONFIG_MALI_GATOR_SUPPORT */
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
index 218e63a..a38e886 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_id.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,22 +32,6 @@
 #define GPU_ID_VERSION_MAJOR              (0xFu  << GPU_ID_VERSION_MAJOR_SHIFT)
 #define GPU_ID_VERSION_PRODUCT_ID  (0xFFFFu << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
 
-/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
-#define GPU_ID_PI_T60X                    0x6956u
-#define GPU_ID_PI_T62X                    0x0620u
-#define GPU_ID_PI_T76X                    0x0750u
-#define GPU_ID_PI_T72X                    0x0720u
-#define GPU_ID_PI_TFRX                    0x0880u
-#define GPU_ID_PI_T86X                    0x0860u
-#define GPU_ID_PI_T82X                    0x0820u
-#define GPU_ID_PI_T83X                    0x0830u
-
-/* New GPU ID format when PRODUCT_ID is >= 0x1000 (and not 0x6956) */
-#define GPU_ID_PI_NEW_FORMAT_START        0x1000
-#define GPU_ID_IS_NEW_FORMAT(product_id)  ((product_id) != GPU_ID_PI_T60X && \
-						(product_id) >= \
-						GPU_ID_PI_NEW_FORMAT_START)
-
 #define GPU_ID2_VERSION_STATUS_SHIFT      0
 #define GPU_ID2_VERSION_MINOR_SHIFT       4
 #define GPU_ID2_VERSION_MAJOR_SHIFT       12
@@ -109,13 +93,16 @@
 #define GPU_ID2_PRODUCT_TDVX              GPU_ID2_MODEL_MAKE(7, 3)
 #define GPU_ID2_PRODUCT_TNOX              GPU_ID2_MODEL_MAKE(7, 1)
 #define GPU_ID2_PRODUCT_TGOX              GPU_ID2_MODEL_MAKE(7, 2)
-#define GPU_ID2_PRODUCT_TKAX              GPU_ID2_MODEL_MAKE(8, 0)
 #define GPU_ID2_PRODUCT_TTRX              GPU_ID2_MODEL_MAKE(9, 0)
-#define GPU_ID2_PRODUCT_TBOX              GPU_ID2_MODEL_MAKE(8, 2)
-
-/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
-#define GPU_ID_S_15DEV0                   0x1
-#define GPU_ID_S_EAC                      0x2
+#define GPU_ID2_PRODUCT_TNAX              GPU_ID2_MODEL_MAKE(9, 1)
+#define GPU_ID2_PRODUCT_TBEX              GPU_ID2_MODEL_MAKE(9, 2)
+#define GPU_ID2_PRODUCT_LBEX              GPU_ID2_MODEL_MAKE(9, 4)
+#define GPU_ID2_PRODUCT_TULX              GPU_ID2_MODEL_MAKE(10, 0)
+#define GPU_ID2_PRODUCT_TDUX              GPU_ID2_MODEL_MAKE(10, 1)
+#define GPU_ID2_PRODUCT_TODX              GPU_ID2_MODEL_MAKE(10, 2)
+#define GPU_ID2_PRODUCT_TIDX              GPU_ID2_MODEL_MAKE(10, 3)
+#define GPU_ID2_PRODUCT_TVAX              GPU_ID2_MODEL_MAKE(10, 4)
+#define GPU_ID2_PRODUCT_LODX              GPU_ID2_MODEL_MAKE(10, 5)
 
 /* Helper macro to create a GPU_ID assuming valid values for id, major,
    minor, status */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
index 514b065..2c42f5c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,7 +43,7 @@
 	kbdev_list = kbase_dev_list_get();
 	list_for_each(entry, kbdev_list) {
 		struct kbase_device *kbdev = NULL;
-		struct kbasep_kctx_list_element *element;
+		struct kbase_context *kctx;
 
 		kbdev = list_entry(entry, struct kbase_device, entry);
 		/* output the total memory usage and cap for this device */
@@ -51,13 +51,13 @@
 				kbdev->devname,
 				atomic_read(&(kbdev->memdev.used_pages)));
 		mutex_lock(&kbdev->kctx_list_lock);
-		list_for_each_entry(element, &kbdev->kctx_list, link) {
+		list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
 			/* output the memory usage and cap for each kctx
 			* opened on this device */
 			seq_printf(sfile, "  %s-0x%p %10u\n",
 				"kctx",
-				element->kctx,
-				atomic_read(&(element->kctx->used_pages)));
+				kctx,
+				atomic_read(&(kctx->used_pages)));
 		}
 		mutex_unlock(&kbdev->kctx_list_lock);
 	}
@@ -74,6 +74,7 @@
 }
 
 static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+	.owner = THIS_MODULE,
 	.open = kbasep_gpu_memory_debugfs_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
index 62ba105..f6b70bd 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,9 @@
 #include <mali_kbase_hwaccess_gpuprops.h>
 #include "mali_kbase_ioctl.h"
 #include <linux/clk.h>
+#include <mali_kbase_pm_internal.h>
+#include <linux/of_platform.h>
+#include <linux/moduleparam.h>
 
 /**
  * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
@@ -191,11 +194,19 @@
 static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
 {
 	int i;
+	u32 gpu_id;
+	u32 product_id;
 
 	/* Populate the base_gpu_props structure */
 	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
 	gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+#if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE
 	gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+#else
+	gpu_props->core_props.gpu_available_memory_size =
+		totalram_pages() << PAGE_SHIFT;
+#endif
+
 	gpu_props->core_props.num_exec_engines =
 		KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
 
@@ -236,10 +247,38 @@
 		gpu_props->thread_props.tls_alloc =
 				gpu_props->raw_props.thread_tls_alloc;
 
-	gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
-	gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
-	gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
-	gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+	/* Workaround for GPU2019HW-509. MIDHARC-2364 was wrongfully applied
+	 * to tDUx GPUs.
+	 */
+	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
+	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	if ((gpu_id & GPU_ID2_PRODUCT_MODEL) == GPU_ID2_PRODUCT_TDUX) {
+		gpu_props->thread_props.max_registers =
+			KBASE_UBFX32(gpu_props->raw_props.thread_features,
+				     0U, 22);
+		gpu_props->thread_props.impl_tech =
+			KBASE_UBFX32(gpu_props->raw_props.thread_features,
+				     22U, 2);
+		gpu_props->thread_props.max_task_queue =
+			KBASE_UBFX32(gpu_props->raw_props.thread_features,
+				     24U, 8);
+		gpu_props->thread_props.max_thread_group_split = 0;
+	} else {
+		gpu_props->thread_props.max_registers =
+			KBASE_UBFX32(gpu_props->raw_props.thread_features,
+				     0U, 16);
+		gpu_props->thread_props.max_task_queue =
+			KBASE_UBFX32(gpu_props->raw_props.thread_features,
+				     16U, 8);
+		gpu_props->thread_props.max_thread_group_split =
+			KBASE_UBFX32(gpu_props->raw_props.thread_features,
+				     24U, 6);
+		gpu_props->thread_props.impl_tech =
+			KBASE_UBFX32(gpu_props->raw_props.thread_features,
+				     30U, 2);
+	}
 
 	/* If values are not specified, then use defaults */
 	if (gpu_props->thread_props.max_registers == 0) {
@@ -303,6 +342,87 @@
 		gpu_props->thread_props.max_thread_group_split = 0;
 }
 
+/*
+ * Module parameters to allow the L2 size and hash configuration to be
+ * overridden.
+ *
+ * These parameters must be set on insmod to take effect, and are not visible
+ * in sysfs.
+ */
+static u8 override_l2_size;
+module_param(override_l2_size, byte, 0);
+MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing");
+
+static u8 override_l2_hash;
+module_param(override_l2_hash, byte, 0);
+MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
+
+/**
+ * kbase_read_l2_config_from_dt - Read L2 configuration
+ * @kbdev: The kbase device for which to get the L2 configuration.
+ *
+ * Check for L2 configuration overrides in module parameters and device tree.
+ * Override values in module parameters take priority over override values in
+ * device tree.
+ *
+ * Return: true if either size or hash was overridden, false if no overrides
+ * were found.
+ */
+static bool kbase_read_l2_config_from_dt(struct kbase_device * const kbdev)
+{
+	struct device_node *np = kbdev->dev->of_node;
+
+	if (!np)
+		return false;
+
+	if (override_l2_size)
+		kbdev->l2_size_override = override_l2_size;
+	else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override))
+		kbdev->l2_size_override = 0;
+
+	if (override_l2_hash)
+		kbdev->l2_hash_override = override_l2_hash;
+	else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override))
+		kbdev->l2_hash_override = 0;
+
+	if (kbdev->l2_size_override || kbdev->l2_hash_override)
+		return true;
+
+	return false;
+}
+
+void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
+{
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
+		struct kbase_gpuprops_regdump regdump;
+		base_gpu_props *gpu_props = &kbdev->gpu_props.props;
+
+		/* Check for L2 cache size & hash overrides */
+		if (!kbase_read_l2_config_from_dt(kbdev))
+			return;
+
+		/* Need L2 to get powered to reflect to L2_FEATURES */
+		kbase_pm_context_active(kbdev);
+
+		/* Wait for the completion of L2 power transition */
+		kbase_pm_wait_for_l2_powered(kbdev);
+
+		/* Dump L2_FEATURES register */
+		kbase_backend_gpuprops_get_l2_features(kbdev, &regdump);
+
+		dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n",
+				regdump.l2_features);
+
+		/* Update gpuprops with reflected L2_FEATURES */
+		gpu_props->raw_props.l2_features = regdump.l2_features;
+		gpu_props->l2_props.log2_cache_size =
+			KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+
+		/* Let GPU idle */
+		kbase_pm_context_idle(kbdev);
+	}
+}
+
 static struct {
 	u32 type;
 	size_t offset;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
index 37d9c08..8edba48 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2015,2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015,2017,2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -55,6 +55,14 @@
 void kbase_gpuprops_set_features(struct kbase_device *kbdev);
 
 /**
+ * kbase_gpuprops_update_l2_features - Update GPU property of L2_FEATURES
+ * @kbdev:   Device pointer
+ *
+ * This function updates l2_features and the log2 cache size.
+ */
+void kbase_gpuprops_update_l2_features(struct kbase_device *kbdev);
+
+/**
  * kbase_gpuprops_populate_user_buffer - Populate the GPU properties buffer
  * @kbdev: The kbase device
  *
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gwt.c b/drivers/gpu/arm/midgard/mali_kbase_gwt.c
index 2d1263d..75a0820 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_gwt.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_gwt.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,7 +40,8 @@
 			err = kbase_mmu_update_pages(kctx, reg->start_pfn,
 					kbase_get_gpu_phy_pages(reg),
 					reg->gpu_alloc->nents,
-					reg->flags & flag);
+					reg->flags & flag,
+					reg->gpu_alloc->group_id);
 			if (err)
 				dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages failure\n");
 		}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c
index f34f53a..c277c0c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hw.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,78 +36,74 @@
 {
 	const enum base_hw_feature *features;
 	u32 gpu_id;
-	u32 product_id;
 
 	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
-	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
-	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
-		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
-		case GPU_ID2_PRODUCT_TMIX:
-			features = base_hw_features_tMIx;
-			break;
-		case GPU_ID2_PRODUCT_THEX:
-			features = base_hw_features_tHEx;
-			break;
-		case GPU_ID2_PRODUCT_TSIX:
-			features = base_hw_features_tSIx;
-			break;
-		case GPU_ID2_PRODUCT_TDVX:
-			features = base_hw_features_tDVx;
-			break;
-		case GPU_ID2_PRODUCT_TNOX:
-			features = base_hw_features_tNOx;
-			break;
-		case GPU_ID2_PRODUCT_TGOX:
-			features = base_hw_features_tGOx;
-			break;
-		case GPU_ID2_PRODUCT_TKAX:
-			features = base_hw_features_tKAx;
-			break;
-		case GPU_ID2_PRODUCT_TTRX:
-			features = base_hw_features_tTRx;
-			break;
-		case GPU_ID2_PRODUCT_TBOX:
-			features = base_hw_features_tBOx;
-			break;
-		default:
-			features = base_hw_features_generic;
-			break;
-		}
-	} else {
-		switch (product_id) {
-		case GPU_ID_PI_TFRX:
-			/* FALLTHROUGH */
-		case GPU_ID_PI_T86X:
-			features = base_hw_features_tFxx;
-			break;
-		case GPU_ID_PI_T83X:
-			features = base_hw_features_t83x;
-			break;
-		case GPU_ID_PI_T82X:
-			features = base_hw_features_t82x;
-			break;
-		case GPU_ID_PI_T76X:
-			features = base_hw_features_t76x;
-			break;
-		case GPU_ID_PI_T72X:
-			features = base_hw_features_t72x;
-			break;
-		case GPU_ID_PI_T62X:
-			features = base_hw_features_t62x;
-			break;
-		case GPU_ID_PI_T60X:
-			features = base_hw_features_t60x;
-			break;
-		default:
-			features = base_hw_features_generic;
-			break;
-		}
+	switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+	case GPU_ID2_PRODUCT_TMIX:
+		features = base_hw_features_tMIx;
+		break;
+	case GPU_ID2_PRODUCT_THEX:
+		features = base_hw_features_tHEx;
+		break;
+	case GPU_ID2_PRODUCT_TSIX:
+		features = base_hw_features_tSIx;
+		break;
+	case GPU_ID2_PRODUCT_TDVX:
+		features = base_hw_features_tDVx;
+		break;
+	case GPU_ID2_PRODUCT_TNOX:
+		features = base_hw_features_tNOx;
+		break;
+	case GPU_ID2_PRODUCT_TGOX:
+		features = base_hw_features_tGOx;
+		break;
+	case GPU_ID2_PRODUCT_TTRX:
+		features = base_hw_features_tTRx;
+		break;
+	case GPU_ID2_PRODUCT_TNAX:
+		features = base_hw_features_tNAx;
+		break;
+	case GPU_ID2_PRODUCT_LBEX:
+	case GPU_ID2_PRODUCT_TBEX:
+		features = base_hw_features_tBEx;
+		break;
+	case GPU_ID2_PRODUCT_TULX:
+		features = base_hw_features_tULx;
+		break;
+	case GPU_ID2_PRODUCT_TDUX:
+		features = base_hw_features_tDUx;
+		break;
+	case GPU_ID2_PRODUCT_TODX:
+	case GPU_ID2_PRODUCT_LODX:
+		features = base_hw_features_tODx;
+		break;
+	case GPU_ID2_PRODUCT_TIDX:
+		features = base_hw_features_tIDx;
+		break;
+	case GPU_ID2_PRODUCT_TVAX:
+		features = base_hw_features_tVAx;
+		break;
+	default:
+		features = base_hw_features_generic;
+		break;
 	}
 
 	for (; *features != BASE_HW_FEATURE_END; features++)
 		set_bit(*features, &kbdev->hw_features_mask[0]);
+
+#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP)
+	/* When dumping is enabled, need to disable flush reduction optimization
+	 * for GPUs on which it is safe to have only cache clean operation at
+	 * the end of job chain.
+	 * This is required to make job dumping work. There is some discrepancy
+	 * in the implementation of flush reduction optimization due to
+	 * unclear or ambiguous ARCH spec.
+	 */
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_CLEAN_ONLY_SAFE))
+		clear_bit(BASE_HW_FEATURE_FLUSH_REDUCTION,
+			&kbdev->hw_features_mask[0]);
+#endif
 }
 
 /**
@@ -117,9 +113,6 @@
  * Return: pointer to an array of hardware issues, terminated by
  * BASE_HW_ISSUE_END.
  *
- * This function can only be used on new-format GPU IDs, i.e. those for which
- * GPU_ID_IS_NEW_FORMAT evaluates as true. The GPU ID is read from the @kbdev.
- *
  * In debugging versions of the driver, unknown versions of a known GPU will
  * be treated as the most recent known version not later than the actual
  * version. In such circumstances, the GPU ID in @kbdev will also be replaced
@@ -179,16 +172,54 @@
 		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tGOx_r1p0},
 		  {U32_MAX, NULL} } },
 
-		{GPU_ID2_PRODUCT_TKAX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tKAx_r0p0},
-		  {U32_MAX, NULL} } },
-
 		{GPU_ID2_PRODUCT_TTRX,
 		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTRx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tTRx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTRx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tTRx_r0p1},
 		  {U32_MAX, NULL} } },
 
-		{GPU_ID2_PRODUCT_TBOX,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBOx_r0p0},
+		{GPU_ID2_PRODUCT_TNAX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tNAx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tNAx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 4), base_hw_issues_tNAx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 5), base_hw_issues_tNAx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tNAx_r0p1},
+		  {GPU_ID2_VERSION_MAKE(0, 1, 1), base_hw_issues_tNAx_r0p1},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_LBEX,
+		 {{GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TBEX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tBEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(0, 0, 3), base_hw_issues_tBEx_r0p0},
+		  {GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tBEx_r1p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TULX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tULx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TDUX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tDUx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TODX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_LODX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tODx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TIDX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tIDx_r0p0},
+		  {U32_MAX, NULL} } },
+
+		{GPU_ID2_PRODUCT_TVAX,
+		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0},
 		  {U32_MAX, NULL} } },
 	};
 
@@ -208,10 +239,8 @@
 	if (product != NULL) {
 		/* Found a matching product. */
 		const u32 version = gpu_id & GPU_ID2_VERSION;
-#if !MALI_CUSTOMER_RELEASE
 		u32 fallback_version = 0;
 		const enum base_hw_issue *fallback_issues = NULL;
-#endif
 		size_t v;
 
 		/* Stop when we reach the end of the map. */
@@ -223,25 +252,34 @@
 				break;
 			}
 
-#if !MALI_CUSTOMER_RELEASE
 			/* Check whether this is a candidate for most recent
 				known version not later than the actual
 				version. */
 			if ((version > product->map[v].version) &&
 				(product->map[v].version >= fallback_version)) {
-				fallback_version = product->map[v].version;
-				fallback_issues = product->map[v].issues;
-			}
+#if MALI_CUSTOMER_RELEASE
+				/* Match on version's major and minor fields */
+				if (((version ^ product->map[v].version) >>
+					GPU_ID2_VERSION_MINOR_SHIFT) == 0)
 #endif
+				{
+					fallback_version = product->map[v].version;
+					fallback_issues = product->map[v].issues;
+				}
+			}
 		}
 
-#if !MALI_CUSTOMER_RELEASE
 		if ((issues == NULL) && (fallback_issues != NULL)) {
 			/* Fall back to the issue set of the most recent known
 				version not later than the actual version. */
 			issues = fallback_issues;
 
+#if MALI_CUSTOMER_RELEASE
+			dev_warn(kbdev->dev,
+				"GPU hardware issue table may need updating:\n"
+#else
 			dev_info(kbdev->dev,
+#endif
 				"r%dp%d status %d is unknown; treating as r%dp%d status %d",
 				(gpu_id & GPU_ID2_VERSION_MAJOR) >>
 					GPU_ID2_VERSION_MAJOR_SHIFT,
@@ -263,7 +301,6 @@
 			kbase_gpuprops_update_core_props_gpu_id(
 				&kbdev->gpu_props.props);
 		}
-#endif
 	}
 	return issues;
 }
@@ -272,224 +309,94 @@
 {
 	const enum base_hw_issue *issues;
 	u32 gpu_id;
-	u32 product_id;
 	u32 impl_tech;
 
 	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
-	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 	impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
 
 	if (impl_tech != IMPLEMENTATION_MODEL) {
-		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
-			issues = kbase_hw_get_issues_for_new_id(kbdev);
-			if (issues == NULL) {
-				dev_err(kbdev->dev,
-					"Unknown GPU ID %x", gpu_id);
-				return -EINVAL;
-			}
+		issues = kbase_hw_get_issues_for_new_id(kbdev);
+		if (issues == NULL) {
+			dev_err(kbdev->dev,
+				"Unknown GPU ID %x", gpu_id);
+			return -EINVAL;
+		}
 
 #if !MALI_CUSTOMER_RELEASE
-			/* The GPU ID might have been replaced with the last
-			   known version of the same GPU. */
-			gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+		/* The GPU ID might have been replaced with the last
+			known version of the same GPU. */
+		gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
 #endif
-
-		} else {
-			switch (gpu_id) {
-			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
-				issues = base_hw_issues_t60x_r0p0_15dev0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
-				issues = base_hw_issues_t60x_r0p0_eac;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
-				issues = base_hw_issues_t60x_r0p1;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
-				issues = base_hw_issues_t62x_r0p1;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
-			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
-				issues = base_hw_issues_t62x_r1p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
-				issues = base_hw_issues_t62x_r1p1;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
-				issues = base_hw_issues_t76x_r0p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
-				issues = base_hw_issues_t76x_r0p1;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
-				issues = base_hw_issues_t76x_r0p1_50rel0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
-				issues = base_hw_issues_t76x_r0p2;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
-				issues = base_hw_issues_t76x_r0p3;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
-				issues = base_hw_issues_t76x_r1p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
-			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
-			case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
-				issues = base_hw_issues_t72x_r0p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
-				issues = base_hw_issues_t72x_r1p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
-				issues = base_hw_issues_t72x_r1p1;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
-				issues = base_hw_issues_tFRx_r0p1;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
-				issues = base_hw_issues_tFRx_r0p2;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
-			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 8):
-				issues = base_hw_issues_tFRx_r1p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
-				issues = base_hw_issues_tFRx_r2p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
-				issues = base_hw_issues_t86x_r0p2;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
-			case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 8):
-				issues = base_hw_issues_t86x_r1p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
-				issues = base_hw_issues_t86x_r2p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
-				issues = base_hw_issues_t83x_r0p1;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
-			case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 8):
-				issues = base_hw_issues_t83x_r1p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
-				issues = base_hw_issues_t82x_r0p0;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
-				issues = base_hw_issues_t82x_r0p1;
-				break;
-			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
-			case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 8):
-				issues = base_hw_issues_t82x_r1p0;
-				break;
-			default:
-				dev_err(kbdev->dev,
-					"Unknown GPU ID %x", gpu_id);
-				return -EINVAL;
-			}
-		}
 	} else {
 		/* Software model */
-		if (GPU_ID_IS_NEW_FORMAT(product_id)) {
-			switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
-			case GPU_ID2_PRODUCT_TMIX:
-				issues = base_hw_issues_model_tMIx;
-				break;
-			case GPU_ID2_PRODUCT_THEX:
-				issues = base_hw_issues_model_tHEx;
-				break;
-			case GPU_ID2_PRODUCT_TSIX:
-				issues = base_hw_issues_model_tSIx;
-				break;
-			case GPU_ID2_PRODUCT_TDVX:
-				issues = base_hw_issues_model_tDVx;
-				break;
-			case GPU_ID2_PRODUCT_TNOX:
-				issues = base_hw_issues_model_tNOx;
-				break;
-			case GPU_ID2_PRODUCT_TGOX:
-				issues = base_hw_issues_model_tGOx;
-				break;
-			case GPU_ID2_PRODUCT_TKAX:
-				issues = base_hw_issues_model_tKAx;
-				break;
-			case GPU_ID2_PRODUCT_TTRX:
-				issues = base_hw_issues_model_tTRx;
-				break;
-			case GPU_ID2_PRODUCT_TBOX:
-				issues = base_hw_issues_model_tBOx;
-				break;
-			default:
-				dev_err(kbdev->dev,
-					"Unknown GPU ID %x", gpu_id);
-				return -EINVAL;
-			}
-		} else {
-			switch (product_id) {
-			case GPU_ID_PI_T60X:
-				issues = base_hw_issues_model_t60x;
-				break;
-			case GPU_ID_PI_T62X:
-				issues = base_hw_issues_model_t62x;
-				break;
-			case GPU_ID_PI_T72X:
-				issues = base_hw_issues_model_t72x;
-				break;
-			case GPU_ID_PI_T76X:
-				issues = base_hw_issues_model_t76x;
-				break;
-			case GPU_ID_PI_TFRX:
-				issues = base_hw_issues_model_tFRx;
-				break;
-			case GPU_ID_PI_T86X:
-				issues = base_hw_issues_model_t86x;
-				break;
-			case GPU_ID_PI_T83X:
-				issues = base_hw_issues_model_t83x;
-				break;
-			case GPU_ID_PI_T82X:
-				issues = base_hw_issues_model_t82x;
-				break;
-			default:
-				dev_err(kbdev->dev, "Unknown GPU ID %x",
-					gpu_id);
-				return -EINVAL;
-			}
+		switch (gpu_id & GPU_ID2_PRODUCT_MODEL) {
+		case GPU_ID2_PRODUCT_TMIX:
+			issues = base_hw_issues_model_tMIx;
+			break;
+		case GPU_ID2_PRODUCT_THEX:
+			issues = base_hw_issues_model_tHEx;
+			break;
+		case GPU_ID2_PRODUCT_TSIX:
+			issues = base_hw_issues_model_tSIx;
+			break;
+		case GPU_ID2_PRODUCT_TDVX:
+			issues = base_hw_issues_model_tDVx;
+			break;
+		case GPU_ID2_PRODUCT_TNOX:
+			issues = base_hw_issues_model_tNOx;
+			break;
+		case GPU_ID2_PRODUCT_TGOX:
+			issues = base_hw_issues_model_tGOx;
+			break;
+		case GPU_ID2_PRODUCT_TTRX:
+			issues = base_hw_issues_model_tTRx;
+			break;
+		case GPU_ID2_PRODUCT_TNAX:
+			issues = base_hw_issues_model_tNAx;
+			break;
+		case GPU_ID2_PRODUCT_LBEX:
+		case GPU_ID2_PRODUCT_TBEX:
+			issues = base_hw_issues_model_tBEx;
+			break;
+		case GPU_ID2_PRODUCT_TULX:
+			issues = base_hw_issues_model_tULx;
+			break;
+		case GPU_ID2_PRODUCT_TDUX:
+			issues = base_hw_issues_model_tDUx;
+			break;
+		case GPU_ID2_PRODUCT_TODX:
+		case GPU_ID2_PRODUCT_LODX:
+			issues = base_hw_issues_model_tODx;
+			break;
+		case GPU_ID2_PRODUCT_TIDX:
+			issues = base_hw_issues_model_tIDx;
+			break;
+		case GPU_ID2_PRODUCT_TVAX:
+			issues = base_hw_issues_model_tVAx;
+			break;
+		default:
+			dev_err(kbdev->dev,
+				"Unknown GPU ID %x", gpu_id);
+			return -EINVAL;
 		}
 	}
 
-	if (GPU_ID_IS_NEW_FORMAT(product_id)) {
-		dev_info(kbdev->dev,
-			"GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d",
-			(gpu_id & GPU_ID2_PRODUCT_MAJOR) >>
-				GPU_ID2_PRODUCT_MAJOR_SHIFT,
-			(gpu_id & GPU_ID2_ARCH_MAJOR) >>
-				GPU_ID2_ARCH_MAJOR_SHIFT,
-			(gpu_id & GPU_ID2_ARCH_MINOR) >>
-				GPU_ID2_ARCH_MINOR_SHIFT,
-			(gpu_id & GPU_ID2_ARCH_REV) >>
-				GPU_ID2_ARCH_REV_SHIFT,
-			(gpu_id & GPU_ID2_VERSION_MAJOR) >>
-				GPU_ID2_VERSION_MAJOR_SHIFT,
-			(gpu_id & GPU_ID2_VERSION_MINOR) >>
-				GPU_ID2_VERSION_MINOR_SHIFT,
-			(gpu_id & GPU_ID2_VERSION_STATUS) >>
-				GPU_ID2_VERSION_STATUS_SHIFT);
-	} else {
-		dev_info(kbdev->dev,
-			"GPU identified as 0x%04x r%dp%d status %d",
-			(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-				GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-			(gpu_id & GPU_ID_VERSION_MAJOR) >>
-				GPU_ID_VERSION_MAJOR_SHIFT,
-			(gpu_id & GPU_ID_VERSION_MINOR) >>
-				GPU_ID_VERSION_MINOR_SHIFT,
-			(gpu_id & GPU_ID_VERSION_STATUS) >>
-				GPU_ID_VERSION_STATUS_SHIFT);
-	}
+	dev_info(kbdev->dev,
+		"GPU identified as 0x%x arch %d.%d.%d r%dp%d status %d",
+		(gpu_id & GPU_ID2_PRODUCT_MAJOR) >>
+			GPU_ID2_PRODUCT_MAJOR_SHIFT,
+		(gpu_id & GPU_ID2_ARCH_MAJOR) >>
+			GPU_ID2_ARCH_MAJOR_SHIFT,
+		(gpu_id & GPU_ID2_ARCH_MINOR) >>
+			GPU_ID2_ARCH_MINOR_SHIFT,
+		(gpu_id & GPU_ID2_ARCH_REV) >>
+			GPU_ID2_ARCH_REV_SHIFT,
+		(gpu_id & GPU_ID2_VERSION_MAJOR) >>
+			GPU_ID2_VERSION_MAJOR_SHIFT,
+		(gpu_id & GPU_ID2_VERSION_MINOR) >>
+			GPU_ID2_VERSION_MINOR_SHIFT,
+		(gpu_id & GPU_ID2_VERSION_STATUS) >>
+			GPU_ID2_VERSION_STATUS_SHIFT);
 
 	for (; *issues != BASE_HW_ISSUE_END; issues++)
 		set_bit(*issues, &kbdev->hw_issues_mask[0]);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
index dde4965..d5e3d3a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -56,4 +56,18 @@
  */
 void kbase_backend_late_term(struct kbase_device *kbdev);
 
+/**
+ * kbase_backend_devfreq_init - Perform backend devfreq related initialization.
+ * @kbdev:      Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_devfreq_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_devfreq_term - Perform backend-devfreq termination.
+ * @kbdev:	Device pointer
+ */
+void kbase_backend_devfreq_term(struct kbase_device *kbdev);
+
 #endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
index 63844d9..62628b61 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018, 2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,7 +40,8 @@
 					struct kbase_gpuprops_regdump *regdump);
 
 /**
- * kbase_backend_gpuprops_get - Fill @regdump with GPU properties read from GPU
+ * kbase_backend_gpuprops_get_features - Fill @regdump with GPU properties read
+ *                                       from GPU
  * @kbdev:   Device pointer
  * @regdump: Pointer to struct kbase_gpuprops_regdump structure
  *
@@ -50,5 +51,17 @@
 void kbase_backend_gpuprops_get_features(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump);
 
+/**
+ * kbase_backend_gpuprops_get_l2_features - Fill @regdump with L2_FEATURES read
+ *                                          from GPU
+ * @kbdev:   Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ *
+ * This function reads L2_FEATURES register that is dependent on the hardware
+ * features bitmask. It will power-on the GPU if required.
+ */
+void kbase_backend_gpuprops_get_l2_features(struct kbase_device *kbdev,
+					struct kbase_gpuprops_regdump *regdump);
+
 
 #endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
index 0c5ceff..d5b9099 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2017-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,28 @@
 #include <mali_kbase_instr_defs.h>
 
 /**
- * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * struct kbase_instr_hwcnt_enable - Enable hardware counter collection.
+ * @dump_buffer:       GPU address to write counters to.
+ * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer.
+ * @jm_bm:             counters selection bitmask (JM).
+ * @shader_bm:         counters selection bitmask (Shader).
+ * @tiler_bm:          counters selection bitmask (Tiler).
+ * @mmu_l2_bm:         counters selection bitmask (MMU_L2).
+ * @use_secondary:     use secondary performance counters set for applicable
+ *                     counter blocks.
+ */
+struct kbase_instr_hwcnt_enable {
+	u64 dump_buffer;
+	u64 dump_buffer_bytes;
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+	bool use_secondary;
+};
+
+/**
+ * kbase_instr_hwcnt_enable_internal() - Enable HW counters collection
  * @kbdev:	Kbase device
  * @kctx:	Kbase context
  * @enable:	HW counter setup parameters
@@ -43,10 +64,10 @@
  */
 int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 				struct kbase_context *kctx,
-				struct kbase_ioctl_hwcnt_enable *enable);
+				struct kbase_instr_hwcnt_enable *enable);
 
 /**
- * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * kbase_instr_hwcnt_disable_internal() - Disable HW counters collection
  * @kctx: Kbase context
  *
  * Context: might sleep, waiting for an ongoing dump to complete
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
index 580ac987..c3b60e6 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -128,7 +128,7 @@
 						struct kbase_context *kctx);
 
 /**
- * kbase_backend_cacheclean - Perform a cache clean if the given atom requires
+ * kbase_backend_cache_clean - Perform a cache clean if the given atom requires
  *                            one
  * @kbdev:	Device pointer
  * @katom:	Pointer to the failed atom
@@ -136,7 +136,7 @@
  * On some GPUs, the GPU cache must be cleaned following a failed atom. This
  * function performs a clean if it is required by @katom.
  */
-void kbase_backend_cacheclean(struct kbase_device *kbdev,
+void kbase_backend_cache_clean(struct kbase_device *kbdev,
 		struct kbase_jd_atom *katom);
 
 
@@ -160,14 +160,12 @@
  *                                        any scheduling has taken place.
  * @kbdev:         Device pointer
  * @core_req:      Core requirements of atom
- * @coreref_state: Coreref state of atom
  *
  * This function should only be called from kbase_jd_done_worker() or
  * js_return_worker().
  */
 void kbase_backend_complete_wq_post_sched(struct kbase_device *kbdev,
-		base_jd_core_req core_req,
-		enum kbase_atom_coreref_state coreref_state);
+		base_jd_core_req core_req);
 
 /**
  * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
@@ -249,14 +247,16 @@
 		struct kbase_jd_atom *target_katom);
 
 /**
- * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
- *                                        running from a context
+ * kbase_backend_jm_kill_running_jobs_from_kctx - Kill all jobs that are
+ *                               currently running on GPU from a context
  * @kctx: Context pointer
  *
  * This is used in response to a page fault to remove all jobs from the faulting
  * context from the hardware.
+ *
+ * Caller must hold hwaccess_lock.
  */
-void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx);
 
 /**
  * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
@@ -277,86 +277,6 @@
  */
 u32 kbase_backend_get_current_flush_id(struct kbase_device *kbdev);
 
-#if KBASE_GPU_RESET_EN
-/**
- * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
- * @kbdev: Device pointer
- *
- * This function just soft-stops all the slots to ensure that as many jobs as
- * possible are saved.
- *
- * Return: a boolean which should be interpreted as follows:
- * - true  - Prepared for reset, kbase_reset_gpu should be called.
- * - false - Another thread is performing a reset, kbase_reset_gpu should
- *                not be called.
- */
-bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
-
-/**
- * kbase_reset_gpu - Reset the GPU
- * @kbdev: Device pointer
- *
- * This function should be called after kbase_prepare_to_reset_gpu if it returns
- * true. It should never be called without a corresponding call to
- * kbase_prepare_to_reset_gpu.
- *
- * After this function is called (or not called if kbase_prepare_to_reset_gpu
- * returned false), the caller should wait for kbdev->reset_waitq to be
- * signalled to know when the reset has completed.
- */
-void kbase_reset_gpu(struct kbase_device *kbdev);
-
-/**
- * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
- * @kbdev: Device pointer
- *
- * This function just soft-stops all the slots to ensure that as many jobs as
- * possible are saved.
- *
- * Return: a boolean which should be interpreted as follows:
- * - true  - Prepared for reset, kbase_reset_gpu should be called.
- * - false - Another thread is performing a reset, kbase_reset_gpu should
- *                not be called.
- */
-bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
-
-/**
- * kbase_reset_gpu_locked - Reset the GPU
- * @kbdev: Device pointer
- *
- * This function should be called after kbase_prepare_to_reset_gpu if it
- * returns true. It should never be called without a corresponding call to
- * kbase_prepare_to_reset_gpu.
- *
- * After this function is called (or not called if kbase_prepare_to_reset_gpu
- * returned false), the caller should wait for kbdev->reset_waitq to be
- * signalled to know when the reset has completed.
- */
-void kbase_reset_gpu_locked(struct kbase_device *kbdev);
-
-/**
- * kbase_reset_gpu_silent - Reset the GPU silently
- * @kbdev: Device pointer
- *
- * Reset the GPU without trying to cancel jobs and don't emit messages into
- * the kernel log while doing the reset.
- *
- * This function should be used in cases where we are doing a controlled reset
- * of the GPU as part of normal processing (e.g. exiting protected mode) where
- * the driver will have ensured the scheduler has been idled and all other
- * users of the GPU (e.g. instrumentation) have been suspended.
- */
-void kbase_reset_gpu_silent(struct kbase_device *kbdev);
-
-/**
- * kbase_reset_gpu_active - Reports if the GPU is being reset
- * @kbdev: Device pointer
- *
- * Return: True if the GPU is in the process of being reset.
- */
-bool kbase_reset_gpu_active(struct kbase_device *kbdev);
-#endif
-
 /**
  * kbase_job_slot_hardstop - Hard-stop the specified job slot
  * @kctx:         The kbase context that contains the job(s) that should
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
index 4598d80..96c473a 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -44,22 +44,18 @@
  *
  * Must be called before any other power management function
  *
- * @param kbdev The kbase device structure for the device (must be a valid
- *              pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
- * @return 0 if the power management framework was successfully
- *         initialized.
+ * Return: 0 if the power management framework was successfully initialized.
  */
 int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
 
 /**
  * Terminate the power management framework.
  *
- * No power management functions may be called after this (except
- * @ref kbase_pm_init)
+ * No power management functions may be called after this
  *
- * @param kbdev The kbase device structure for the device (must be a valid
- *              pointer)
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
 
@@ -201,14 +197,15 @@
 					const struct kbase_pm_policy *policy);
 
 /**
- * Retrieve a static list of the available policies.
+ * kbase_pm_list_policies - Retrieve a static list of the available policies.
  *
- * @param[out] policies An array pointer to take the list of policies. This may
- *                      be NULL. The contents of this array must not be
- *                      modified.
+ * @kbdev:   The kbase device structure for the device.
+ * @list:    An array pointer to take the list of policies. This may be NULL.
+ *           The contents of this array must not be modified.
  *
- * @return The number of policies
+ * Return: The number of policies
  */
-int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+int kbase_pm_list_policies(struct kbase_device *kbdev,
+	const struct kbase_pm_policy * const **list);
 
 #endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
index 9b86b51..f7539f5 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -51,7 +51,11 @@
  *
  * This function is only in use for BASE_HW_ISSUE_6367
  */
-#ifndef CONFIG_MALI_NO_MALI
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_device *kbdev)
+{
+}
+#else
 void kbase_wait_write_flush(struct kbase_device *kbdev);
 #endif
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c
new file mode 100644
index 0000000..265fc21
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt.c
@@ -0,0 +1,807 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Implementation of hardware counter context and accumulator APIs.
+ */
+
+#include "mali_kbase_hwcnt_context.h"
+#include "mali_kbase_hwcnt_accumulator.h"
+#include "mali_kbase_hwcnt_backend.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+#include "mali_kbase_linux.h"
+
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+/**
+ * enum kbase_hwcnt_accum_state - Hardware counter accumulator states.
+ * @ACCUM_STATE_ERROR:    Error state, where all accumulator operations fail.
+ * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled.
+ * @ACCUM_STATE_ENABLED:  Enabled state, where dumping is enabled if there are
+ *                        any enabled counters.
+ */
+enum kbase_hwcnt_accum_state {
+	ACCUM_STATE_ERROR,
+	ACCUM_STATE_DISABLED,
+	ACCUM_STATE_ENABLED
+};
+
+/**
+ * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure.
+ * @backend:                Pointer to created counter backend.
+ * @state:                  The current state of the accumulator.
+ *                           - State transition from disabled->enabled or
+ *                             disabled->error requires state_lock.
+ *                           - State transition from enabled->disabled or
+ *                             enabled->error requires both accum_lock and
+ *                             state_lock.
+ *                           - Error state persists until next disable.
+ * @enable_map:             The current set of enabled counters.
+ *                           - Must only be modified while holding both
+ *                             accum_lock and state_lock.
+ *                           - Can be read while holding either lock.
+ *                           - Must stay in sync with enable_map_any_enabled.
+ * @enable_map_any_enabled: True if any counters in the map are enabled, else
+ *                          false. If true, and state is ACCUM_STATE_ENABLED,
+ *                          then the counter backend will be enabled.
+ *                           - Must only be modified while holding both
+ *                             accum_lock and state_lock.
+ *                           - Can be read while holding either lock.
+ *                           - Must stay in sync with enable_map.
+ * @scratch_map:            Scratch enable map, used as temporary enable map
+ *                          storage during dumps.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ * @accum_buf:              Accumulation buffer, where dumps will be accumulated
+ *                          into on transition to a disable state.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ * @accumulated:            True if the accumulation buffer has been accumulated
+ *                          into and not subsequently read from yet, else false.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ * @ts_last_dump_ns:        Timestamp (ns) of the end time of the most recent
+ *                          dump that was requested by the user.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ */
+struct kbase_hwcnt_accumulator {
+	struct kbase_hwcnt_backend *backend;
+	enum kbase_hwcnt_accum_state state;
+	struct kbase_hwcnt_enable_map enable_map;
+	bool enable_map_any_enabled;
+	struct kbase_hwcnt_enable_map scratch_map;
+	struct kbase_hwcnt_dump_buffer accum_buf;
+	bool accumulated;
+	u64 ts_last_dump_ns;
+};
+
+/**
+ * struct kbase_hwcnt_context - Hardware counter context structure.
+ * @iface:         Pointer to hardware counter backend interface.
+ * @state_lock:    Spinlock protecting state.
+ * @disable_count: Disable count of the context. Initialised to 1.
+ *                 Decremented when the accumulator is acquired, and incremented
+ *                 on release. Incremented on calls to
+ *                 kbase_hwcnt_context_disable[_atomic], and decremented on
+ *                 calls to kbase_hwcnt_context_enable.
+ *                  - Must only be read or modified while holding state_lock.
+ * @accum_lock:    Mutex protecting accumulator.
+ * @accum_inited:  Flag to prevent concurrent accumulator initialisation and/or
+ *                 termination. Set to true before accumulator initialisation,
+ *                 and false after accumulator termination.
+ *                  - Must only be modified while holding both accum_lock and
+ *                    state_lock.
+ *                  - Can be read while holding either lock.
+ * @accum:         Hardware counter accumulator structure.
+ */
+struct kbase_hwcnt_context {
+	const struct kbase_hwcnt_backend_interface *iface;
+	spinlock_t state_lock;
+	size_t disable_count;
+	struct mutex accum_lock;
+	bool accum_inited;
+	struct kbase_hwcnt_accumulator accum;
+};
+
+int kbase_hwcnt_context_init(
+	const struct kbase_hwcnt_backend_interface *iface,
+	struct kbase_hwcnt_context **out_hctx)
+{
+	struct kbase_hwcnt_context *hctx = NULL;
+
+	if (!iface || !out_hctx)
+		return -EINVAL;
+
+	hctx = kzalloc(sizeof(*hctx), GFP_KERNEL);
+	if (!hctx)
+		return -ENOMEM;
+
+	hctx->iface = iface;
+	spin_lock_init(&hctx->state_lock);
+	hctx->disable_count = 1;
+	mutex_init(&hctx->accum_lock);
+	hctx->accum_inited = false;
+
+	*out_hctx = hctx;
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_init);
+
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx)
+{
+	if (!hctx)
+		return;
+
+	/* Make sure we didn't leak the accumulator */
+	WARN_ON(hctx->accum_inited);
+	kfree(hctx);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_term);
+
+/**
+ * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx)
+{
+	WARN_ON(!hctx);
+	WARN_ON(!hctx->accum_inited);
+
+	kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map);
+	kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf);
+	kbase_hwcnt_enable_map_free(&hctx->accum.enable_map);
+	hctx->iface->term(hctx->accum.backend);
+	memset(&hctx->accum, 0, sizeof(hctx->accum));
+}
+
+/**
+ * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
+{
+	int errcode;
+
+	WARN_ON(!hctx);
+	WARN_ON(!hctx->accum_inited);
+
+	errcode = hctx->iface->init(
+		hctx->iface->info, &hctx->accum.backend);
+	if (errcode)
+		goto error;
+
+	hctx->accum.state = ACCUM_STATE_ERROR;
+
+	errcode = kbase_hwcnt_enable_map_alloc(
+		hctx->iface->metadata, &hctx->accum.enable_map);
+	if (errcode)
+		goto error;
+
+	hctx->accum.enable_map_any_enabled = false;
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(
+		hctx->iface->metadata, &hctx->accum.accum_buf);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_enable_map_alloc(
+		hctx->iface->metadata, &hctx->accum.scratch_map);
+	if (errcode)
+		goto error;
+
+	hctx->accum.accumulated = false;
+
+	hctx->accum.ts_last_dump_ns =
+		hctx->iface->timestamp_ns(hctx->accum.backend);
+
+	return 0;
+
+error:
+	kbasep_hwcnt_accumulator_term(hctx);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the
+ *                                      disabled state, from the enabled or
+ *                                      error states.
+ * @hctx:       Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_accumulator_disable(
+	struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+	int errcode = 0;
+	bool backend_enabled = false;
+	struct kbase_hwcnt_accumulator *accum;
+	unsigned long flags;
+
+	WARN_ON(!hctx);
+	lockdep_assert_held(&hctx->accum_lock);
+	WARN_ON(!hctx->accum_inited);
+
+	accum = &hctx->accum;
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	WARN_ON(hctx->disable_count != 0);
+	WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED);
+
+	if ((hctx->accum.state == ACCUM_STATE_ENABLED) &&
+	    (accum->enable_map_any_enabled))
+		backend_enabled = true;
+
+	if (!backend_enabled)
+		hctx->accum.state = ACCUM_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	/* Early out if the backend is not already enabled */
+	if (!backend_enabled)
+		return;
+
+	if (!accumulate)
+		goto disable;
+
+	/* Try and accumulate before disabling */
+	errcode = hctx->iface->dump_request(accum->backend);
+	if (errcode)
+		goto disable;
+
+	errcode = hctx->iface->dump_wait(accum->backend);
+	if (errcode)
+		goto disable;
+
+	errcode = hctx->iface->dump_get(accum->backend,
+		&accum->accum_buf, &accum->enable_map, accum->accumulated);
+	if (errcode)
+		goto disable;
+
+	accum->accumulated = true;
+
+disable:
+	hctx->iface->dump_disable(accum->backend);
+
+	/* Regardless of any errors during the accumulate, put the accumulator
+	 * in the disabled state.
+	 */
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	hctx->accum.state = ACCUM_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+
+/**
+ * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the
+ *                                     enabled state, from the disabled state.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
+{
+	int errcode = 0;
+	struct kbase_hwcnt_accumulator *accum;
+
+	WARN_ON(!hctx);
+	lockdep_assert_held(&hctx->state_lock);
+	WARN_ON(!hctx->accum_inited);
+	WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED);
+
+	accum = &hctx->accum;
+
+	/* The backend only needs enabling if any counters are enabled */
+	if (accum->enable_map_any_enabled)
+		errcode = hctx->iface->dump_enable_nolock(
+			accum->backend, &accum->enable_map);
+
+	if (!errcode)
+		accum->state = ACCUM_STATE_ENABLED;
+	else
+		accum->state = ACCUM_STATE_ERROR;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date
+ *                                   values of enabled counters possible, and
+ *                                   optionally update the set of enabled
+ *                                   counters.
+ * @hctx :       Non-NULL pointer to the hardware counter context
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ * @new_map:     Pointer to the new counter enable map. If non-NULL, must have
+ *               the same metadata as the accumulator. If NULL, the set of
+ *               enabled counters will be unchanged.
+ */
+static int kbasep_hwcnt_accumulator_dump(
+	struct kbase_hwcnt_context *hctx,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf,
+	const struct kbase_hwcnt_enable_map *new_map)
+{
+	int errcode = 0;
+	unsigned long flags;
+	enum kbase_hwcnt_accum_state state;
+	bool dump_requested = false;
+	bool dump_written = false;
+	bool cur_map_any_enabled;
+	struct kbase_hwcnt_enable_map *cur_map;
+	bool new_map_any_enabled = false;
+	u64 dump_time_ns;
+	struct kbase_hwcnt_accumulator *accum;
+
+	WARN_ON(!hctx);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(dump_buf && (dump_buf->metadata != hctx->iface->metadata));
+	WARN_ON(new_map && (new_map->metadata != hctx->iface->metadata));
+	WARN_ON(!hctx->accum_inited);
+	lockdep_assert_held(&hctx->accum_lock);
+
+	accum = &hctx->accum;
+	cur_map = &accum->scratch_map;
+
+	/* Save out info about the current enable map */
+	cur_map_any_enabled = accum->enable_map_any_enabled;
+	kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map);
+
+	if (new_map)
+		new_map_any_enabled =
+			kbase_hwcnt_enable_map_any_enabled(new_map);
+
+	/*
+	 * We're holding accum_lock, so the accumulator state might transition
+	 * from disabled to enabled during this function (as enabling is lock
+	 * free), but it will never disable (as disabling needs to hold the
+	 * accum_lock), nor will it ever transition from enabled to error (as
+	 * an enable while we're already enabled is impossible).
+	 *
+	 * If we're already disabled, we'll only look at the accumulation buffer
+	 * rather than do a real dump, so a concurrent enable does not affect
+	 * us.
+	 *
+	 * If a concurrent enable fails, we might transition to the error
+	 * state, but again, as we're only looking at the accumulation buffer,
+	 * it's not an issue.
+	 */
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	state = accum->state;
+
+	/*
+	 * Update the new map now, such that if an enable occurs during this
+	 * dump then that enable will set the new map. If we're already enabled,
+	 * then we'll do it ourselves after the dump.
+	 */
+	if (new_map) {
+		kbase_hwcnt_enable_map_copy(
+			&accum->enable_map, new_map);
+		accum->enable_map_any_enabled = new_map_any_enabled;
+	}
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	/* Error state, so early out. No need to roll back any map updates */
+	if (state == ACCUM_STATE_ERROR)
+		return -EIO;
+
+	/* Initiate the dump if the backend is enabled. */
+	if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
+		/* Disable pre-emption, to make the timestamp as accurate as
+		 * possible.
+		 */
+		preempt_disable();
+		{
+			dump_time_ns = hctx->iface->timestamp_ns(
+				accum->backend);
+			if (dump_buf) {
+				errcode = hctx->iface->dump_request(
+					accum->backend);
+				dump_requested = true;
+			} else {
+				errcode = hctx->iface->dump_clear(
+					accum->backend);
+			}
+		}
+		preempt_enable();
+		if (errcode)
+			goto error;
+	} else {
+		dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
+	}
+
+	/* Copy any accumulation into the dest buffer */
+	if (accum->accumulated && dump_buf) {
+		kbase_hwcnt_dump_buffer_copy(
+			dump_buf, &accum->accum_buf, cur_map);
+		dump_written = true;
+	}
+
+	/* Wait for any requested dumps to complete */
+	if (dump_requested) {
+		WARN_ON(state != ACCUM_STATE_ENABLED);
+		errcode = hctx->iface->dump_wait(accum->backend);
+		if (errcode)
+			goto error;
+	}
+
+	/* If we're enabled and there's a new enable map, change the enabled set
+	 * as soon after the dump has completed as possible.
+	 */
+	if ((state == ACCUM_STATE_ENABLED) && new_map) {
+		/* Backend is only enabled if there were any enabled counters */
+		if (cur_map_any_enabled)
+			hctx->iface->dump_disable(accum->backend);
+
+		/* (Re-)enable the backend if the new map has enabled counters.
+		 * No need to acquire the spinlock, as concurrent enable while
+		 * we're already enabled and holding accum_lock is impossible.
+		 */
+		if (new_map_any_enabled) {
+			errcode = hctx->iface->dump_enable(
+				accum->backend, new_map);
+			if (errcode)
+				goto error;
+		}
+	}
+
+	/* Copy, accumulate, or zero into the dest buffer to finish */
+	if (dump_buf) {
+		/* If we dumped, copy or accumulate it into the destination */
+		if (dump_requested) {
+			WARN_ON(state != ACCUM_STATE_ENABLED);
+			errcode = hctx->iface->dump_get(
+				accum->backend,
+				dump_buf,
+				cur_map,
+				dump_written);
+			if (errcode)
+				goto error;
+			dump_written = true;
+		}
+
+		/* If we've not written anything into the dump buffer so far, it
+		 * means there was nothing to write. Zero any enabled counters.
+		 */
+		if (!dump_written)
+			kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map);
+	}
+
+	/* Write out timestamps */
+	*ts_start_ns = accum->ts_last_dump_ns;
+	*ts_end_ns = dump_time_ns;
+
+	accum->accumulated = false;
+	accum->ts_last_dump_ns = dump_time_ns;
+
+	return 0;
+error:
+	/* An error was only physically possible if the backend was enabled */
+	WARN_ON(state != ACCUM_STATE_ENABLED);
+
+	/* Disable the backend, and transition to the error state */
+	hctx->iface->dump_disable(accum->backend);
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	accum->state = ACCUM_STATE_ERROR;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx:       Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_context_disable(
+	struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+	unsigned long flags;
+
+	WARN_ON(!hctx);
+	lockdep_assert_held(&hctx->accum_lock);
+
+	if (!kbase_hwcnt_context_disable_atomic(hctx)) {
+		kbasep_hwcnt_accumulator_disable(hctx, accumulate);
+
+		spin_lock_irqsave(&hctx->state_lock, flags);
+
+		/* Atomic disable failed and we're holding the mutex, so current
+		 * disable count must be 0.
+		 */
+		WARN_ON(hctx->disable_count != 0);
+		hctx->disable_count++;
+
+		spin_unlock_irqrestore(&hctx->state_lock, flags);
+	}
+}
+
+int kbase_hwcnt_accumulator_acquire(
+	struct kbase_hwcnt_context *hctx,
+	struct kbase_hwcnt_accumulator **accum)
+{
+	int errcode = 0;
+	unsigned long flags;
+
+	if (!hctx || !accum)
+		return -EINVAL;
+
+	mutex_lock(&hctx->accum_lock);
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	if (!hctx->accum_inited)
+		/* Set accum initing now to prevent concurrent init */
+		hctx->accum_inited = true;
+	else
+		/* Already have an accum, or already being inited */
+		errcode = -EBUSY;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+	mutex_unlock(&hctx->accum_lock);
+
+	if (errcode)
+		return errcode;
+
+	errcode = kbasep_hwcnt_accumulator_init(hctx);
+
+	if (errcode) {
+		mutex_lock(&hctx->accum_lock);
+		spin_lock_irqsave(&hctx->state_lock, flags);
+
+		hctx->accum_inited = false;
+
+		spin_unlock_irqrestore(&hctx->state_lock, flags);
+		mutex_unlock(&hctx->accum_lock);
+
+		return errcode;
+	}
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	WARN_ON(hctx->disable_count == 0);
+	WARN_ON(hctx->accum.enable_map_any_enabled);
+
+	/* Decrement the disable count to allow the accumulator to be accessible
+	 * now that it's fully constructed.
+	 */
+	hctx->disable_count--;
+
+	/*
+	 * Make sure the accumulator is initialised to the correct state.
+	 * Regardless of initial state, counters don't need to be enabled via
+	 * the backend, as the initial enable map has no enabled counters.
+	 */
+	hctx->accum.state = (hctx->disable_count == 0) ?
+		ACCUM_STATE_ENABLED :
+		ACCUM_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	*accum = &hctx->accum;
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_acquire);
+
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_context *hctx;
+
+	if (!accum)
+		return;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+	mutex_lock(&hctx->accum_lock);
+
+	/* Double release is a programming error */
+	WARN_ON(!hctx->accum_inited);
+
+	/* Disable the context to ensure the accumulator is inaccesible while
+	 * we're destroying it. This performs the corresponding disable count
+	 * increment to the decrement done during acquisition.
+	 */
+	kbasep_hwcnt_context_disable(hctx, false);
+
+	mutex_unlock(&hctx->accum_lock);
+
+	kbasep_hwcnt_accumulator_term(hctx);
+
+	mutex_lock(&hctx->accum_lock);
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	hctx->accum_inited = false;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+	mutex_unlock(&hctx->accum_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_release);
+
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx)
+{
+	if (WARN_ON(!hctx))
+		return;
+
+	/* Try and atomically disable first, so we can avoid locking the mutex
+	 * if we don't need to.
+	 */
+	if (kbase_hwcnt_context_disable_atomic(hctx))
+		return;
+
+	mutex_lock(&hctx->accum_lock);
+
+	kbasep_hwcnt_context_disable(hctx, true);
+
+	mutex_unlock(&hctx->accum_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable);
+
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx)
+{
+	unsigned long flags;
+	bool atomic_disabled = false;
+
+	if (WARN_ON(!hctx))
+		return false;
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	if (!WARN_ON(hctx->disable_count == SIZE_MAX)) {
+		/*
+		 * If disable count is non-zero or no counters are enabled, we
+		 * can just bump the disable count.
+		 *
+		 * Otherwise, we can't disable in an atomic context.
+		 */
+		if (hctx->disable_count != 0) {
+			hctx->disable_count++;
+			atomic_disabled = true;
+		} else {
+			WARN_ON(!hctx->accum_inited);
+			if (!hctx->accum.enable_map_any_enabled) {
+				hctx->disable_count++;
+				hctx->accum.state = ACCUM_STATE_DISABLED;
+				atomic_disabled = true;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	return atomic_disabled;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_disable_atomic);
+
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
+{
+	unsigned long flags;
+
+	if (WARN_ON(!hctx))
+		return;
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	if (!WARN_ON(hctx->disable_count == 0)) {
+		if (hctx->disable_count == 1)
+			kbasep_hwcnt_accumulator_enable(hctx);
+
+		hctx->disable_count--;
+	}
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_enable);
+
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
+	struct kbase_hwcnt_context *hctx)
+{
+	if (!hctx)
+		return NULL;
+
+	return hctx->iface->metadata;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_context_metadata);
+
+int kbase_hwcnt_accumulator_set_counters(
+	struct kbase_hwcnt_accumulator *accum,
+	const struct kbase_hwcnt_enable_map *new_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_context *hctx;
+
+	if (!accum || !new_map || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+	if ((new_map->metadata != hctx->iface->metadata) ||
+	    (dump_buf && (dump_buf->metadata != hctx->iface->metadata)))
+		return -EINVAL;
+
+	mutex_lock(&hctx->accum_lock);
+
+	errcode = kbasep_hwcnt_accumulator_dump(
+		hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
+
+	mutex_unlock(&hctx->accum_lock);
+
+	return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_set_counters);
+
+int kbase_hwcnt_accumulator_dump(
+	struct kbase_hwcnt_accumulator *accum,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_context *hctx;
+
+	if (!accum || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+	if (dump_buf && (dump_buf->metadata != hctx->iface->metadata))
+		return -EINVAL;
+
+	mutex_lock(&hctx->accum_lock);
+
+	errcode = kbasep_hwcnt_accumulator_dump(
+		hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
+
+	mutex_unlock(&hctx->accum_lock);
+
+	return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_accumulator_dump);
+
+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum)
+{
+	struct kbase_hwcnt_context *hctx;
+
+	if (WARN_ON(!accum))
+		return 0;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+	return hctx->iface->timestamp_ns(accum->backend);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h
new file mode 100644
index 0000000..eb82ea4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_accumulator.h
@@ -0,0 +1,146 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter accumulator API.
+ */
+
+#ifndef _KBASE_HWCNT_ACCUMULATOR_H_
+#define _KBASE_HWCNT_ACCUMULATOR_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_accumulator;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator
+ *                                     for a hardware counter context.
+ * @hctx:  Non-NULL pointer to a hardware counter context.
+ * @accum: Non-NULL pointer to where the pointer to the created accumulator
+ *         will be stored on success.
+ *
+ * There can exist at most one instance of the hardware counter accumulator per
+ * context at a time.
+ *
+ * If multiple clients need access to the hardware counters at the same time,
+ * then an abstraction built on top of the single instance to the hardware
+ * counter accumulator is required.
+ *
+ * No counters will be enabled with the returned accumulator. A subsequent call
+ * to kbase_hwcnt_accumulator_set_counters must be used to turn them on.
+ *
+ * There are four components to a hardware counter dump:
+ *  - A set of enabled counters
+ *  - A start time
+ *  - An end time
+ *  - A dump buffer containing the accumulated counter values for all enabled
+ *    counters between the start and end times.
+ *
+ * For each dump, it is guaranteed that all enabled counters were active for the
+ * entirety of the period between the start and end times.
+ *
+ * It is also guaranteed that the start time of dump "n" is always equal to the
+ * end time of dump "n - 1".
+ *
+ * For all dumps, the values of any counters that were not enabled is undefined.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_acquire(
+	struct kbase_hwcnt_context *hctx,
+	struct kbase_hwcnt_accumulator **accum);
+
+/**
+ * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ *
+ * The accumulator must be released before the context the accumulator was
+ * created from is terminated.
+ */
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum);
+
+/**
+ * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently
+ *                                          enabled counters, and enable a new
+ *                                          set of counters that will be used
+ *                                          for subsequent dumps.
+ * @accum:       Non-NULL pointer to the hardware counter accumulator.
+ * @new_map:     Non-NULL pointer to the new counter enable map. Must have the
+ *               same metadata as the accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_set_counters(
+	struct kbase_hwcnt_accumulator *accum,
+	const struct kbase_hwcnt_enable_map *new_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled
+ *                                  counters.
+ * @accum:       Non-NULL pointer to the hardware counter accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_dump(
+	struct kbase_hwcnt_accumulator *accum,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend
+ *                                          timestamp.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ *
+ * Return: Accumulator backend timestamp in nanoseconds.
+ */
+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum);
+
+#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h
new file mode 100644
index 0000000..b7aa0e1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend.h
@@ -0,0 +1,217 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Virtual interface for hardware counter backends.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_H_
+#define _KBASE_HWCNT_BACKEND_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/*
+ * struct kbase_hwcnt_backend_info - Opaque pointer to information used to
+ *                                   create an instance of a hardware counter
+ *                                   backend.
+ */
+struct kbase_hwcnt_backend_info;
+
+/*
+ * struct kbase_hwcnt_backend_info - Opaque pointer to a hardware counter
+ *                                   backend, used to perform dumps.
+ */
+struct kbase_hwcnt_backend;
+
+/**
+ * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend.
+ * @info:        Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * All uses of the created hardware counter backend must be externally
+ * synchronised.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_init_fn)(
+	const struct kbase_hwcnt_backend_info *info,
+	struct kbase_hwcnt_backend **out_backend);
+
+/**
+ * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend.
+ * @backend: Pointer to backend to be terminated.
+ */
+typedef void (*kbase_hwcnt_backend_term_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend
+ *                                               timestamp.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * Return: Backend timestamp in nanoseconds.
+ */
+typedef u64 (*kbase_hwcnt_backend_timestamp_ns_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the
+ *                                              backend.
+ * @backend:    Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * The enable_map must have been created using the interface's metadata.
+ * If the backend has already been enabled, an error is returned.
+ *
+ * May be called in an atomic context.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_enable_fn)(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping
+ *                                                     with the backend.
+ * @backend:    Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be
+ * called in an atomic context with the spinlock documented by the specific
+ * backend interface held.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_enable_nolock_fn)(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with
+ *                                               the backend.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is already disabled, does nothing.
+ * Any undumped counter values since the last dump get will be lost.
+ */
+typedef void (*kbase_hwcnt_backend_dump_disable_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped
+ *                                             counters.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_clear_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
+ *                                               dump.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled or another dump is already in progress,
+ * returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_request_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
+ *                                            counter dump has completed.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_wait_fn)(
+	struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the
+ *                                           counters dumped after the last dump
+ *                                           request into the dump buffer.
+ * @backend:     Non-NULL pointer to backend.
+ * @dump_buffer: Non-NULL pointer to destination dump buffer.
+ * @enable_map:  Non-NULL pointer to enable map specifying enabled values.
+ * @accumulate:  True if counters should be accumulated into dump_buffer, rather
+ *               than copied.
+ *
+ * If the backend is not enabled, returns an error.
+ * If a dump is in progress (i.e. dump_wait has not yet returned successfully)
+ * then the resultant contents of the dump buffer will be undefined.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int (*kbase_hwcnt_backend_dump_get_fn)(
+	struct kbase_hwcnt_backend *backend,
+	struct kbase_hwcnt_dump_buffer *dump_buffer,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	bool accumulate);
+
+/**
+ * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual
+ *                                        interface.
+ * @metadata:           Immutable hardware counter metadata.
+ * @info:               Immutable info used to initialise an instance of the
+ *                      backend.
+ * @init:               Function ptr to initialise an instance of the backend.
+ * @term:               Function ptr to terminate an instance of the backend.
+ * @timestamp_ns:       Function ptr to get the current backend timestamp.
+ * @dump_enable:        Function ptr to enable dumping.
+ * @dump_enable_nolock: Function ptr to enable dumping while the
+ *                      backend-specific spinlock is already held.
+ * @dump_disable:       Function ptr to disable dumping.
+ * @dump_clear:         Function ptr to clear counters.
+ * @dump_request:       Function ptr to request a dump.
+ * @dump_wait:          Function ptr to wait until dump to complete.
+ * @dump_get:           Function ptr to copy or accumulate dump into a dump
+ *                      buffer.
+ */
+struct kbase_hwcnt_backend_interface {
+	const struct kbase_hwcnt_metadata *metadata;
+	const struct kbase_hwcnt_backend_info *info;
+	kbase_hwcnt_backend_init_fn init;
+	kbase_hwcnt_backend_term_fn term;
+	kbase_hwcnt_backend_timestamp_ns_fn timestamp_ns;
+	kbase_hwcnt_backend_dump_enable_fn dump_enable;
+	kbase_hwcnt_backend_dump_enable_nolock_fn dump_enable_nolock;
+	kbase_hwcnt_backend_dump_disable_fn dump_disable;
+	kbase_hwcnt_backend_dump_clear_fn dump_clear;
+	kbase_hwcnt_backend_dump_request_fn dump_request;
+	kbase_hwcnt_backend_dump_wait_fn dump_wait;
+	kbase_hwcnt_backend_dump_get_fn dump_get;
+};
+
+#endif /* _KBASE_HWCNT_BACKEND_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c
new file mode 100644
index 0000000..1e9c25a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.c
@@ -0,0 +1,511 @@
+/*
+ *
+ * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_backend_gpu.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_hwaccess_instr.h"
+#ifdef CONFIG_MALI_NO_MALI
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+/**
+ * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance
+ *                                       of a GPU hardware counter backend.
+ * @kbdev:         KBase device.
+ * @use_secondary: True if secondary performance counters should be used,
+ *                 else false. Ignored if secondary counters are not supported.
+ * @metadata:      Hardware counter metadata.
+ * @dump_bytes:    Bytes of GPU memory required to perform a
+ *                 hardware counter dump.
+ */
+struct kbase_hwcnt_backend_gpu_info {
+	struct kbase_device *kbdev;
+	bool use_secondary;
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t dump_bytes;
+};
+
+/**
+ * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend.
+ * @info:         Info used to create the backend.
+ * @kctx:         KBase context used for GPU memory allocation and
+ *                counter dumping.
+ * @gpu_dump_va:  GPU hardware counter dump buffer virtual address.
+ * @cpu_dump_va:  CPU mapping of gpu_dump_va.
+ * @vmap:         Dump buffer vmap.
+ * @enabled:      True if dumping has been enabled, else false.
+ * @pm_core_mask:  PM state sync-ed shaders core mask for the enabled dumping.
+ */
+struct kbase_hwcnt_backend_gpu {
+	const struct kbase_hwcnt_backend_gpu_info *info;
+	struct kbase_context *kctx;
+	u64 gpu_dump_va;
+	void *cpu_dump_va;
+	struct kbase_vmap_struct *vmap;
+	bool enabled;
+	u64 pm_core_mask;
+};
+
+/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64 kbasep_hwcnt_backend_gpu_timestamp_ns(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct timespec ts;
+
+	(void)backend;
+	getrawmonotonic(&ts);
+	return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int kbasep_hwcnt_backend_gpu_dump_enable_nolock(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_hwcnt_physical_enable_map phys;
+	struct kbase_instr_hwcnt_enable enable;
+
+	if (!backend_gpu || !enable_map || backend_gpu->enabled ||
+	    (enable_map->metadata != backend_gpu->info->metadata))
+		return -EINVAL;
+
+	kctx = backend_gpu->kctx;
+	kbdev = backend_gpu->kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map);
+
+	enable.jm_bm = phys.jm_bm;
+	enable.shader_bm = phys.shader_bm;
+	enable.tiler_bm = phys.tiler_bm;
+	enable.mmu_l2_bm = phys.mmu_l2_bm;
+	enable.use_secondary = backend_gpu->info->use_secondary;
+	enable.dump_buffer = backend_gpu->gpu_dump_va;
+	enable.dump_buffer_bytes = backend_gpu->info->dump_bytes;
+
+	errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
+	if (errcode)
+		goto error;
+
+	backend_gpu->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
+	backend_gpu->enabled = true;
+
+	return 0;
+error:
+	return errcode;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_gpu_dump_enable(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	unsigned long flags;
+	int errcode;
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+	struct kbase_device *kbdev;
+
+	if (!backend_gpu)
+		return -EINVAL;
+
+	kbdev = backend_gpu->kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock(
+		backend, enable_map);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return errcode;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void kbasep_hwcnt_backend_gpu_dump_disable(
+	struct kbase_hwcnt_backend *backend)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (WARN_ON(!backend_gpu) || !backend_gpu->enabled)
+		return;
+
+	errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx);
+	WARN_ON(errcode);
+
+	backend_gpu->enabled = false;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int kbasep_hwcnt_backend_gpu_dump_clear(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (!backend_gpu || !backend_gpu->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_clear(backend_gpu->kctx);
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int kbasep_hwcnt_backend_gpu_dump_request(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (!backend_gpu || !backend_gpu->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_request_dump(backend_gpu->kctx);
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int kbasep_hwcnt_backend_gpu_dump_wait(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (!backend_gpu || !backend_gpu->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx);
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_gpu_dump_get(
+	struct kbase_hwcnt_backend *backend,
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map,
+	bool accumulate)
+{
+	struct kbase_hwcnt_backend_gpu *backend_gpu =
+		(struct kbase_hwcnt_backend_gpu *)backend;
+
+	if (!backend_gpu || !dst || !dst_enable_map ||
+	    (backend_gpu->info->metadata != dst->metadata) ||
+	    (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	/* Invalidate the kernel buffer before reading from it. */
+	kbase_sync_mem_regions(
+		backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU);
+
+	return kbase_hwcnt_gpu_dump_get(
+		dst, backend_gpu->cpu_dump_va, dst_enable_map,
+		backend_gpu->pm_core_mask, accumulate);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer.
+ * @info:        Non-NULL pointer to GPU backend info.
+ * @kctx:        Non-NULL pointer to kbase context.
+ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
+ *               is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_dump_alloc(
+	const struct kbase_hwcnt_backend_gpu_info *info,
+	struct kbase_context *kctx,
+	u64 *gpu_dump_va)
+{
+	struct kbase_va_region *reg;
+	u64 flags;
+	u64 nr_pages;
+
+	WARN_ON(!info);
+	WARN_ON(!kctx);
+	WARN_ON(!gpu_dump_va);
+
+	flags = BASE_MEM_PROT_CPU_RD |
+		BASE_MEM_PROT_GPU_WR |
+		BASEP_MEM_PERMANENT_KERNEL_MAPPING |
+		BASE_MEM_CACHED_CPU;
+
+	if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE)
+		flags |= BASE_MEM_UNCACHED_GPU;
+
+	nr_pages = PFN_UP(info->dump_bytes);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va);
+
+	if (!reg)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer.
+ * @kctx:        Non-NULL pointer to kbase context.
+ * @gpu_dump_va: GPU dump buffer virtual address.
+ */
+static void kbasep_hwcnt_backend_gpu_dump_free(
+	struct kbase_context *kctx,
+	u64 gpu_dump_va)
+{
+	WARN_ON(!kctx);
+	if (gpu_dump_va)
+		kbase_mem_free(kctx, gpu_dump_va);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend.
+ * @backend: Pointer to GPU backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_gpu_destroy(
+	struct kbase_hwcnt_backend_gpu *backend)
+{
+	if (!backend)
+		return;
+
+	if (backend->kctx) {
+		struct kbase_context *kctx = backend->kctx;
+		struct kbase_device *kbdev = kctx->kbdev;
+
+		if (backend->cpu_dump_va)
+			kbase_phy_alloc_mapping_put(kctx, backend->vmap);
+
+		if (backend->gpu_dump_va)
+			kbasep_hwcnt_backend_gpu_dump_free(
+				kctx, backend->gpu_dump_va);
+
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+		kbase_destroy_context(kctx);
+	}
+
+	kfree(backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend.
+ * @info:        Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_create(
+	const struct kbase_hwcnt_backend_gpu_info *info,
+	struct kbase_hwcnt_backend_gpu **out_backend)
+{
+	int errcode;
+	struct kbase_device *kbdev;
+	struct kbase_hwcnt_backend_gpu *backend = NULL;
+
+	WARN_ON(!info);
+	WARN_ON(!out_backend);
+
+	kbdev = info->kbdev;
+
+	backend = kzalloc(sizeof(*backend), GFP_KERNEL);
+	if (!backend)
+		goto alloc_error;
+
+	backend->info = info;
+
+	backend->kctx = kbase_create_context(kbdev, true,
+		BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
+	if (!backend->kctx)
+		goto alloc_error;
+
+	kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
+
+	errcode = kbasep_hwcnt_backend_gpu_dump_alloc(
+		info, backend->kctx, &backend->gpu_dump_va);
+	if (errcode)
+		goto error;
+
+	backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
+		backend->gpu_dump_va, &backend->vmap);
+	if (!backend->cpu_dump_va)
+		goto alloc_error;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va);
+#endif
+
+	*out_backend = backend;
+	return 0;
+
+alloc_error:
+	errcode = -ENOMEM;
+error:
+	kbasep_hwcnt_backend_gpu_destroy(backend);
+	return errcode;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_gpu_init(
+	const struct kbase_hwcnt_backend_info *info,
+	struct kbase_hwcnt_backend **out_backend)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_gpu *backend = NULL;
+
+	if (!info || !out_backend)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_gpu_create(
+		(const struct kbase_hwcnt_backend_gpu_info *) info, &backend);
+	if (errcode)
+		return errcode;
+
+	*out_backend = (struct kbase_hwcnt_backend *)backend;
+
+	return 0;
+}
+
+/* GPU backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend)
+{
+	if (!backend)
+		return;
+
+	kbasep_hwcnt_backend_gpu_dump_disable(backend);
+	kbasep_hwcnt_backend_gpu_destroy(
+		(struct kbase_hwcnt_backend_gpu *)backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_gpu_info_destroy(
+	const struct kbase_hwcnt_backend_gpu_info *info)
+{
+	if (!info)
+		return;
+
+	kbase_hwcnt_gpu_metadata_destroy(info->metadata);
+	kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info.
+ * @kbdev: Non_NULL pointer to kbase device.
+ * @out_info: Non-NULL pointer to where info is stored on success.
+ *
+ * Return 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_info_create(
+	struct kbase_device *kbdev,
+	const struct kbase_hwcnt_backend_gpu_info **out_info)
+{
+	int errcode = -ENOMEM;
+	struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
+	struct kbase_hwcnt_backend_gpu_info *info = NULL;
+
+	WARN_ON(!kbdev);
+	WARN_ON(!out_info);
+
+	errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info);
+	if (errcode)
+		return errcode;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		goto error;
+
+	info->kbdev = kbdev;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	info->use_secondary = true;
+#else
+	info->use_secondary = false;
+#endif
+
+	errcode = kbase_hwcnt_gpu_metadata_create(
+		&hwcnt_gpu_info, info->use_secondary,
+		&info->metadata,
+		&info->dump_bytes);
+	if (errcode)
+		goto error;
+
+	*out_info = info;
+
+	return 0;
+error:
+	kbasep_hwcnt_backend_gpu_info_destroy(info);
+	return errcode;
+}
+
+int kbase_hwcnt_backend_gpu_create(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	int errcode;
+	const struct kbase_hwcnt_backend_gpu_info *info = NULL;
+
+	if (!kbdev || !iface)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info);
+
+	if (errcode)
+		return errcode;
+
+	iface->metadata = info->metadata;
+	iface->info = (struct kbase_hwcnt_backend_info *)info;
+	iface->init = kbasep_hwcnt_backend_gpu_init;
+	iface->term = kbasep_hwcnt_backend_gpu_term;
+	iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns;
+	iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable;
+	iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock;
+	iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable;
+	iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear;
+	iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request;
+	iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait;
+	iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get;
+
+	return 0;
+}
+
+void kbase_hwcnt_backend_gpu_destroy(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	if (!iface)
+		return;
+
+	kbasep_hwcnt_backend_gpu_info_destroy(
+		(const struct kbase_hwcnt_backend_gpu_info *)iface->info);
+	memset(iface, 0, sizeof(*iface));
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h
new file mode 100644
index 0000000..7712f14
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_backend_gpu.h
@@ -0,0 +1,61 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU
+ * backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_GPU_H_
+#define _KBASE_HWCNT_BACKEND_GPU_H_
+
+#include "mali_kbase_hwcnt_backend.h"
+
+struct kbase_device;
+
+/**
+ * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend
+ *                                    interface.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @iface: Non-NULL pointer to backend interface structure that is filled in
+ *             on creation success.
+ *
+ * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_gpu_create(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend
+ *                                     interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_gpu_destroy(
+	struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h
new file mode 100644
index 0000000..bc50ad1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_context.h
@@ -0,0 +1,119 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter context API.
+ */
+
+#ifndef _KBASE_HWCNT_CONTEXT_H_
+#define _KBASE_HWCNT_CONTEXT_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_backend_interface;
+struct kbase_hwcnt_context;
+
+/**
+ * kbase_hwcnt_context_init() - Initialise a hardware counter context.
+ * @iface:    Non-NULL pointer to a hardware counter backend interface.
+ * @out_hctx: Non-NULL pointer to where the pointer to the created context will
+ *            be stored on success.
+ *
+ * On creation, the disable count of the context will be 0.
+ * A hardware counter accumulator can be acquired using a created context.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_context_init(
+	const struct kbase_hwcnt_backend_interface *iface,
+	struct kbase_hwcnt_context **out_hctx);
+
+/**
+ * kbase_hwcnt_context_term() - Terminate a hardware counter context.
+ * @hctx: Pointer to context to be terminated.
+ */
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by
+ *                                  the context, so related counter data
+ *                                  structures can be created.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(
+	struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx: Pointer to the hardware counter context.
+ *
+ * If a call to this function increments the disable count from 0 to 1, and
+ * an accumulator has been acquired, then a counter dump will be performed
+ * before counters are disabled via the backend interface.
+ *
+ * Subsequent dumps via the accumulator while counters are disabled will first
+ * return the accumulated dump, then will return dumps with zeroed counters.
+ *
+ * After this function call returns, it is guaranteed that counters will not be
+ * enabled via the backend interface.
+ */
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the
+ *                                        context if possible in an atomic
+ *                                        context.
+ * @hctx: Pointer to the hardware counter context.
+ *
+ * This function will only succeed if hardware counters are effectively already
+ * disabled, i.e. there is no accumulator, the disable count is already
+ * non-zero, or the accumulator has no counters set.
+ *
+ * After this function call returns true, it is guaranteed that counters will
+ * not be enabled via the backend interface.
+ *
+ * Return: True if the disable count was incremented, else False.
+ */
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_enable() - Decrement the disable count of the context.
+ * @hctx: Pointer to the hardware counter context.
+ *
+ * If a call to this function decrements the disable count from 1 to 0, and
+ * an accumulator has been acquired, then counters will be re-enabled via the
+ * backend interface.
+ *
+ * If an accumulator has been acquired and enabling counters fails for some
+ * reason, the accumulator will be placed into an error state.
+ *
+ * It is only valid to call this function one time for each prior returned call
+ * to kbase_hwcnt_context_disable.
+ *
+ * The spinlock documented in the backend interface that was passed in to
+ * kbase_hwcnt_context_init() must be held before calling this function.
+ */
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx);
+
+#endif /* _KBASE_HWCNT_CONTEXT_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c
new file mode 100644
index 0000000..8581fe9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.c
@@ -0,0 +1,777 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#ifdef CONFIG_MALI_NO_MALI
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#define KBASE_HWCNT_V4_BLOCKS_PER_GROUP 8
+#define KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP 4
+#define KBASE_HWCNT_V4_MAX_GROUPS \
+	(KBASE_HWCNT_AVAIL_MASK_BITS / KBASE_HWCNT_V4_BLOCKS_PER_GROUP)
+#define KBASE_HWCNT_V4_HEADERS_PER_BLOCK 4
+#define KBASE_HWCNT_V4_COUNTERS_PER_BLOCK 60
+#define KBASE_HWCNT_V4_VALUES_PER_BLOCK \
+	(KBASE_HWCNT_V4_HEADERS_PER_BLOCK + KBASE_HWCNT_V4_COUNTERS_PER_BLOCK)
+/* Index of the PRFCNT_EN header into a V4 counter block */
+#define KBASE_HWCNT_V4_PRFCNT_EN_HEADER 2
+
+#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
+#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
+#define KBASE_HWCNT_V5_COUNTERS_PER_BLOCK 60
+#define KBASE_HWCNT_V5_VALUES_PER_BLOCK \
+	(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_COUNTERS_PER_BLOCK)
+/* Index of the PRFCNT_EN header into a V5 counter block */
+#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
+
+/**
+ * kbasep_hwcnt_backend_gpu_metadata_v4_create() - Create hardware counter
+ *                                                 metadata for a v4 GPU.
+ * @v4_info:  Non-NULL pointer to hwcnt info for a v4 GPU.
+ * @metadata: Non-NULL pointer to where created metadata is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_metadata_v4_create(
+	const struct kbase_hwcnt_gpu_v4_info *v4_info,
+	const struct kbase_hwcnt_metadata **metadata)
+{
+	size_t grp;
+	int errcode = -ENOMEM;
+	struct kbase_hwcnt_description desc;
+	struct kbase_hwcnt_group_description *grps;
+	size_t avail_mask_bit;
+
+	WARN_ON(!v4_info);
+	WARN_ON(!metadata);
+
+	/* Check if there are enough bits in the availability mask to represent
+	 * all the hardware counter blocks in the system.
+	 */
+	if (v4_info->cg_count > KBASE_HWCNT_V4_MAX_GROUPS)
+		return -EINVAL;
+
+	grps = kcalloc(v4_info->cg_count, sizeof(*grps), GFP_KERNEL);
+	if (!grps)
+		goto clean_up;
+
+	desc.grp_cnt = v4_info->cg_count;
+	desc.grps = grps;
+
+	for (grp = 0; grp < v4_info->cg_count; grp++) {
+		size_t blk;
+		size_t sc;
+		const u64 core_mask = v4_info->cgs[grp].core_mask;
+		struct kbase_hwcnt_block_description *blks = kcalloc(
+			KBASE_HWCNT_V4_BLOCKS_PER_GROUP,
+			sizeof(*blks),
+			GFP_KERNEL);
+
+		if (!blks)
+			goto clean_up;
+
+		grps[grp].type = KBASE_HWCNT_GPU_GROUP_TYPE_V4;
+		grps[grp].blk_cnt = KBASE_HWCNT_V4_BLOCKS_PER_GROUP;
+		grps[grp].blks = blks;
+
+		for (blk = 0; blk < KBASE_HWCNT_V4_BLOCKS_PER_GROUP; blk++) {
+			blks[blk].inst_cnt = 1;
+			blks[blk].hdr_cnt =
+				KBASE_HWCNT_V4_HEADERS_PER_BLOCK;
+			blks[blk].ctr_cnt =
+				KBASE_HWCNT_V4_COUNTERS_PER_BLOCK;
+		}
+
+		for (sc = 0; sc < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP; sc++) {
+			blks[sc].type = core_mask & (1ull << sc) ?
+				KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER :
+				KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED;
+		}
+
+		blks[4].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER;
+		blks[5].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2;
+		blks[6].type = KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED;
+		blks[7].type = (grp == 0) ?
+			KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM :
+			KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED;
+
+		WARN_ON(KBASE_HWCNT_V4_BLOCKS_PER_GROUP != 8);
+	}
+
+	/* Initialise the availability mask */
+	desc.avail_mask = 0;
+	avail_mask_bit = 0;
+
+	for (grp = 0; grp < desc.grp_cnt; grp++) {
+		size_t blk;
+		const struct kbase_hwcnt_block_description *blks =
+			desc.grps[grp].blks;
+		for (blk = 0; blk < desc.grps[grp].blk_cnt; blk++) {
+			WARN_ON(blks[blk].inst_cnt != 1);
+			if (blks[blk].type !=
+			    KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED)
+				desc.avail_mask |= (1ull << avail_mask_bit);
+
+			avail_mask_bit++;
+		}
+	}
+
+	errcode = kbase_hwcnt_metadata_create(&desc, metadata);
+
+	/* Always clean up, as metadata will make a copy of the input args */
+clean_up:
+	if (grps) {
+		for (grp = 0; grp < v4_info->cg_count; grp++)
+			kfree(grps[grp].blks);
+		kfree(grps);
+	}
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_v4_dump_bytes() - Get the raw dump buffer size for a
+ *                                            V4 GPU.
+ * @v4_info: Non-NULL pointer to hwcnt info for a v4 GPU.
+ *
+ * Return: Size of buffer the V4 GPU needs to perform a counter dump.
+ */
+static size_t kbasep_hwcnt_backend_gpu_v4_dump_bytes(
+	const struct kbase_hwcnt_gpu_v4_info *v4_info)
+{
+	return v4_info->cg_count *
+		KBASE_HWCNT_V4_BLOCKS_PER_GROUP *
+		KBASE_HWCNT_V4_VALUES_PER_BLOCK *
+		KBASE_HWCNT_VALUE_BYTES;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter
+ *                                                 metadata for a v5 GPU.
+ * @v5_info:       Non-NULL pointer to hwcnt info for a v5 GPU.
+ * @use_secondary: True if secondary performance counters should be used, else
+ *                 false. Ignored if secondary counters are not supported.
+ * @metadata:      Non-NULL pointer to where created metadata is stored
+ *                 on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
+	const struct kbase_hwcnt_gpu_v5_info *v5_info,
+	bool use_secondary,
+	const struct kbase_hwcnt_metadata **metadata)
+{
+	struct kbase_hwcnt_description desc;
+	struct kbase_hwcnt_group_description group;
+	struct kbase_hwcnt_block_description
+		blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+	size_t non_sc_block_count;
+	size_t sc_block_count;
+
+	WARN_ON(!v5_info);
+	WARN_ON(!metadata);
+
+	/* Calculate number of block instances that aren't shader cores */
+	non_sc_block_count = 2 + v5_info->l2_count;
+	/* Calculate number of block instances that are shader cores */
+	sc_block_count = fls64(v5_info->core_mask);
+
+	/*
+	 * A system can have up to 64 shader cores, but the 64-bit
+	 * availability mask can't physically represent that many cores as well
+	 * as the other hardware blocks.
+	 * Error out if there are more blocks than our implementation can
+	 * support.
+	 */
+	if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
+		return -EINVAL;
+
+	/* One Job Manager block */
+	blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM;
+	blks[0].inst_cnt = 1;
+	blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+	/* One Tiler block */
+	blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
+	blks[1].inst_cnt = 1;
+	blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+	/* l2_count memsys blks */
+	blks[2].type = use_secondary ?
+		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 :
+		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
+	blks[2].inst_cnt = v5_info->l2_count;
+	blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+	/*
+	 * There are as many shader cores in the system as there are bits set in
+	 * the core mask. However, the dump buffer memory requirements need to
+	 * take into account the fact that the core mask may be non-contiguous.
+	 *
+	 * For example, a system with a core mask of 0b1011 has the same dump
+	 * buffer memory requirements as a system with 0b1111, but requires more
+	 * memory than a system with 0b0111. However, core 2 of the system with
+	 * 0b1011 doesn't physically exist, and the dump buffer memory that
+	 * accounts for that core will never be written to when we do a counter
+	 * dump.
+	 *
+	 * We find the core mask's last set bit to determine the memory
+	 * requirements, and embed the core mask into the availability mask so
+	 * we can determine later which shader cores physically exist.
+	 */
+	blks[3].type = use_secondary ?
+		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 :
+		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
+	blks[3].inst_cnt = sc_block_count;
+	blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
+
+	WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
+
+	group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+	group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT;
+	group.blks = blks;
+
+	desc.grp_cnt = 1;
+	desc.grps = &group;
+
+	/* The JM, Tiler, and L2s are always available, and are before cores */
+	desc.avail_mask = (1ull << non_sc_block_count) - 1;
+	/* Embed the core mask directly in the availability mask */
+	desc.avail_mask |= (v5_info->core_mask << non_sc_block_count);
+
+	return kbase_hwcnt_metadata_create(&desc, metadata);
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_v5_dump_bytes() - Get the raw dump buffer size for a
+ *                                            V5 GPU.
+ * @v5_info: Non-NULL pointer to hwcnt info for a v5 GPU.
+ *
+ * Return: Size of buffer the V5 GPU needs to perform a counter dump.
+ */
+static size_t kbasep_hwcnt_backend_gpu_v5_dump_bytes(
+	const struct kbase_hwcnt_gpu_v5_info *v5_info)
+{
+	WARN_ON(!v5_info);
+	return (2 + v5_info->l2_count + fls64(v5_info->core_mask)) *
+		KBASE_HWCNT_V5_VALUES_PER_BLOCK *
+		KBASE_HWCNT_VALUE_BYTES;
+}
+
+int kbase_hwcnt_gpu_info_init(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_gpu_info *info)
+{
+	if (!kbdev || !info)
+		return -EINVAL;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* NO_MALI uses V5 layout, regardless of the underlying platform. */
+	info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+	info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+	info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+#else
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+		info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V4;
+		info->v4.cg_count = kbdev->gpu_props.num_core_groups;
+		info->v4.cgs = kbdev->gpu_props.props.coherency_info.group;
+	} else {
+		const struct base_gpu_props *props = &kbdev->gpu_props.props;
+		const size_t l2_count = props->l2_props.num_l2_slices;
+		const size_t core_mask =
+			props->coherency_info.group[0].core_mask;
+
+		info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+		info->v5.l2_count = l2_count;
+		info->v5.core_mask = core_mask;
+	}
+#endif
+	return 0;
+}
+
+int kbase_hwcnt_gpu_metadata_create(
+	const struct kbase_hwcnt_gpu_info *info,
+	bool use_secondary,
+	const struct kbase_hwcnt_metadata **out_metadata,
+	size_t *out_dump_bytes)
+{
+	int errcode;
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t dump_bytes;
+
+	if (!info || !out_metadata || !out_dump_bytes)
+		return -EINVAL;
+
+	switch (info->type) {
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+		dump_bytes = kbasep_hwcnt_backend_gpu_v4_dump_bytes(&info->v4);
+		errcode = kbasep_hwcnt_backend_gpu_metadata_v4_create(
+			&info->v4, &metadata);
+		break;
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+		dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5);
+		errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create(
+			&info->v5, use_secondary, &metadata);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (errcode)
+		return errcode;
+
+	/*
+	 * Dump abstraction size should be exactly the same size and layout as
+	 * the physical dump size, for backwards compatibility.
+	 */
+	WARN_ON(dump_bytes != metadata->dump_buf_bytes);
+
+	*out_metadata = metadata;
+	*out_dump_bytes = dump_bytes;
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_create);
+
+void kbase_hwcnt_gpu_metadata_destroy(
+	const struct kbase_hwcnt_metadata *metadata)
+{
+	if (!metadata)
+		return;
+
+	kbase_hwcnt_metadata_destroy(metadata);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_metadata_destroy);
+
+static bool is_block_type_shader(
+	const u64 grp_type,
+	const u64 blk_type,
+	const size_t blk)
+{
+	bool is_shader = false;
+
+	switch (grp_type) {
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+		/* blk-value in [0, KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP-1]
+		 * corresponds to a shader, or its implementation
+		 * reserved. As such, here we use the blk index value to
+		 * tell the reserved case.
+		 */
+		if (blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER ||
+		    (blk < KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP &&
+		     blk_type == KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED))
+			is_shader = true;
+		break;
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+		if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
+		    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2)
+			is_shader = true;
+		break;
+	default:
+		/* Warn on unknown group type */
+		WARN_ON(true);
+	}
+
+	return is_shader;
+}
+
+int kbase_hwcnt_gpu_dump_get(
+	struct kbase_hwcnt_dump_buffer *dst,
+	void *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map,
+	u64 pm_core_mask,
+	bool accumulate)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	const u32 *dump_src;
+	size_t src_offset, grp, blk, blk_inst;
+	size_t grp_prev = 0;
+	u64 core_mask = pm_core_mask;
+
+	if (!dst || !src || !dst_enable_map ||
+	    (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	metadata = dst->metadata;
+	dump_src = (const u32 *)src;
+	src_offset = 0;
+
+	kbase_hwcnt_metadata_for_each_block(
+		metadata, grp, blk, blk_inst) {
+		const size_t hdr_cnt =
+			kbase_hwcnt_metadata_block_headers_count(
+				metadata, grp, blk);
+		const size_t ctr_cnt =
+			kbase_hwcnt_metadata_block_counters_count(
+				metadata, grp, blk);
+		const u64 blk_type = kbase_hwcnt_metadata_block_type(
+			metadata, grp, blk);
+		const bool is_shader_core = is_block_type_shader(
+			kbase_hwcnt_metadata_group_type(metadata, grp),
+			blk_type, blk);
+
+		if (grp != grp_prev) {
+			/* grp change would only happen with V4. V5 and
+			 * further are envisaged to be single group
+			 * scenario only. Here needs to drop the lower
+			 * group core-mask by shifting right with
+			 * KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP.
+			 */
+			core_mask = pm_core_mask >>
+				KBASE_HWCNT_V4_SC_BLOCKS_PER_GROUP;
+			grp_prev = grp;
+		}
+
+		/* Early out if no values in the dest block are enabled */
+		if (kbase_hwcnt_enable_map_block_enabled(
+			dst_enable_map, grp, blk, blk_inst)) {
+			u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+				dst, grp, blk, blk_inst);
+			const u32 *src_blk = dump_src + src_offset;
+
+			if (!is_shader_core || (core_mask & 1)) {
+				if (accumulate) {
+					kbase_hwcnt_dump_buffer_block_accumulate(
+						dst_blk, src_blk, hdr_cnt,
+						ctr_cnt);
+				} else {
+					kbase_hwcnt_dump_buffer_block_copy(
+						dst_blk, src_blk,
+						(hdr_cnt + ctr_cnt));
+				}
+			} else if (!accumulate) {
+				kbase_hwcnt_dump_buffer_block_zero(
+					dst_blk, (hdr_cnt + ctr_cnt));
+			}
+		}
+
+		src_offset += (hdr_cnt + ctr_cnt);
+		if (is_shader_core)
+			core_mask = core_mask >> 1;
+	}
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_dump_get);
+
+/**
+ * kbasep_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
+ *                                                    enable map abstraction to
+ *                                                    a physical block enable
+ *                                                    map.
+ * @lo: Low 64 bits of block enable map abstraction.
+ * @hi: High 64 bits of block enable map abstraction.
+ *
+ * The abstraction uses 128 bits to enable 128 block values, whereas the
+ * physical uses just 32 bits, as bit n enables values [n*4, n*4+3].
+ * Therefore, this conversion is lossy.
+ *
+ * Return: 32-bit physical block enable map.
+ */
+static inline u32 kbasep_hwcnt_backend_gpu_block_map_to_physical(
+	u64 lo,
+	u64 hi)
+{
+	u32 phys = 0;
+	u64 dwords[2] = {lo, hi};
+	size_t dword_idx;
+
+	for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+		const u64 dword = dwords[dword_idx];
+		u16 packed = 0;
+
+		size_t hword_bit;
+
+		for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+			const size_t dword_bit = hword_bit * 4;
+			const u16 mask =
+				((dword >> (dword_bit + 0)) & 0x1) |
+				((dword >> (dword_bit + 1)) & 0x1) |
+				((dword >> (dword_bit + 2)) & 0x1) |
+				((dword >> (dword_bit + 3)) & 0x1);
+			packed |= (mask << hword_bit);
+		}
+		phys |= ((u32)packed) << (16 * dword_idx);
+	}
+	return phys;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical
+ *                                                      block enable map to a
+ *                                                      block enable map
+ *                                                      abstraction.
+ * @phys: Physical 32-bit block enable map
+ * @lo:   Non-NULL pointer to where low 64 bits of block enable map abstraction
+ *        will be stored.
+ * @hi:   Non-NULL pointer to where high 64 bits of block enable map abstraction
+ *        will be stored.
+ */
+static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(
+	u32 phys,
+	u64 *lo,
+	u64 *hi)
+{
+	u64 dwords[2] = {0, 0};
+
+	size_t dword_idx;
+
+	for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+		const u16 packed = phys >> (16 * dword_idx);
+		u64 dword = 0;
+
+		size_t hword_bit;
+
+		for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+			const size_t dword_bit = hword_bit * 4;
+			const u64 mask = (packed >> (hword_bit)) & 0x1;
+
+			dword |= mask << (dword_bit + 0);
+			dword |= mask << (dword_bit + 1);
+			dword |= mask << (dword_bit + 2);
+			dword |= mask << (dword_bit + 3);
+		}
+		dwords[dword_idx] = dword;
+	}
+	*lo = dwords[0];
+	*hi = dwords[1];
+}
+
+void kbase_hwcnt_gpu_enable_map_to_physical(
+	struct kbase_hwcnt_physical_enable_map *dst,
+	const struct kbase_hwcnt_enable_map *src)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+
+	u64 jm_bm = 0;
+	u64 shader_bm = 0;
+	u64 tiler_bm = 0;
+	u64 mmu_l2_bm = 0;
+
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!src) || WARN_ON(!dst))
+		return;
+
+	metadata = src->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(
+		metadata, grp, blk, blk_inst) {
+		const u64 grp_type = kbase_hwcnt_metadata_group_type(
+			metadata, grp);
+		const u64 blk_type = kbase_hwcnt_metadata_block_type(
+			metadata, grp, blk);
+		const size_t blk_val_cnt =
+			kbase_hwcnt_metadata_block_values_count(
+				metadata, grp, blk);
+		const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
+			src, grp, blk, blk_inst);
+
+		switch ((enum kbase_hwcnt_gpu_group_type)grp_type) {
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+			WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK);
+			switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) {
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER:
+				shader_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER:
+				tiler_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2:
+				mmu_l2_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
+				jm_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
+				break;
+			default:
+				WARN_ON(true);
+			}
+			break;
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
+			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+				jm_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+				tiler_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+				shader_bm |= *blk_map;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+				mmu_l2_bm |= *blk_map;
+				break;
+			default:
+				WARN_ON(true);
+			}
+			break;
+		default:
+			WARN_ON(true);
+		}
+	}
+
+	dst->jm_bm =
+		kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0);
+	dst->shader_bm =
+		kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0);
+	dst->tiler_bm =
+		kbasep_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0);
+	dst->mmu_l2_bm =
+		kbasep_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical);
+
+void kbase_hwcnt_gpu_enable_map_from_physical(
+	struct kbase_hwcnt_enable_map *dst,
+	const struct kbase_hwcnt_physical_enable_map *src)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+
+	u64 ignored_hi;
+	u64 jm_bm;
+	u64 shader_bm;
+	u64 tiler_bm;
+	u64 mmu_l2_bm;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!src) || WARN_ON(!dst))
+		return;
+
+	metadata = dst->metadata;
+
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(
+		src->jm_bm, &jm_bm, &ignored_hi);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(
+		src->shader_bm, &shader_bm, &ignored_hi);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(
+		src->tiler_bm, &tiler_bm, &ignored_hi);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(
+		src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi);
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		const u64 grp_type = kbase_hwcnt_metadata_group_type(
+			metadata, grp);
+		const u64 blk_type = kbase_hwcnt_metadata_block_type(
+			metadata, grp, blk);
+		const size_t blk_val_cnt =
+			kbase_hwcnt_metadata_block_values_count(
+				metadata, grp, blk);
+		u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
+			dst, grp, blk, blk_inst);
+
+		switch ((enum kbase_hwcnt_gpu_group_type)grp_type) {
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+			WARN_ON(blk_val_cnt != KBASE_HWCNT_V4_VALUES_PER_BLOCK);
+			switch ((enum kbase_hwcnt_gpu_v4_block_type)blk_type) {
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER:
+				*blk_map = shader_bm;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER:
+				*blk_map = tiler_bm;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2:
+				*blk_map = mmu_l2_bm;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
+				*blk_map = jm_bm;
+				break;
+			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
+				break;
+			default:
+				WARN_ON(true);
+			}
+			break;
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
+			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+				*blk_map = jm_bm;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+				*blk_map = tiler_bm;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+				*blk_map = shader_bm;
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+				*blk_map = mmu_l2_bm;
+				break;
+			default:
+				WARN_ON(true);
+			}
+			break;
+		default:
+			WARN_ON(true);
+		}
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_from_physical);
+
+void kbase_hwcnt_gpu_patch_dump_headers(
+	struct kbase_hwcnt_dump_buffer *buf,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!buf) || WARN_ON(!enable_map) ||
+	    WARN_ON(buf->metadata != enable_map->metadata))
+		return;
+
+	metadata = buf->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		const u64 grp_type =
+			kbase_hwcnt_metadata_group_type(metadata, grp);
+		u32 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(
+			buf, grp, blk, blk_inst);
+		const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
+			enable_map, grp, blk, blk_inst);
+		const u32 prfcnt_en =
+			kbasep_hwcnt_backend_gpu_block_map_to_physical(
+				blk_map[0], 0);
+
+		switch ((enum kbase_hwcnt_gpu_group_type)grp_type) {
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
+			buf_blk[KBASE_HWCNT_V4_PRFCNT_EN_HEADER] = prfcnt_en;
+			break;
+		case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+			buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
+			break;
+		default:
+			WARN_ON(true);
+		}
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_patch_dump_headers);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h
new file mode 100644
index 0000000..12891e0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_gpu.h
@@ -0,0 +1,251 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_HWCNT_GPU_H_
+#define _KBASE_HWCNT_GPU_H_
+
+#include <linux/types.h>
+
+struct kbase_device;
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
+ *                                   identify metadata groups.
+ * @KBASE_HWCNT_GPU_GROUP_TYPE_V4: GPU V4 group type.
+ * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
+ */
+enum kbase_hwcnt_gpu_group_type {
+	KBASE_HWCNT_GPU_GROUP_TYPE_V4 = 0x10,
+	KBASE_HWCNT_GPU_GROUP_TYPE_V5,
+};
+
+/**
+ * enum kbase_hwcnt_gpu_v4_block_type - GPU V4 hardware counter block types,
+ *                                      used to identify metadata blocks.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER:   Shader block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER:    Tiler block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2:   MMU/L2 block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:       Job Manager block.
+ * @KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED: Reserved block.
+ */
+enum kbase_hwcnt_gpu_v4_block_type {
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_SHADER = 0x20,
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_TILER,
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_MMU_L2,
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM,
+	KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED,
+};
+
+/**
+ * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
+ *                                      used to identify metadata blocks.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:      Job Manager block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:   Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:      Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:     Secondary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:  Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
+ */
+enum kbase_hwcnt_gpu_v5_block_type {
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
+};
+
+/**
+ * struct kbase_hwcnt_physical_enable_map - Representation of enable map
+ *                                          directly used by GPU.
+ * @jm_bm:     Job Manager counters selection bitmask.
+ * @shader_bm: Shader counters selection bitmask.
+ * @tiler_bm:  Tiler counters selection bitmask.
+ * @mmu_l2_bm: MMU_L2 counters selection bitmask.
+ */
+struct kbase_hwcnt_physical_enable_map {
+	u32 jm_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+};
+
+/**
+ * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs.
+ * @cg_count: Core group count.
+ * @cgs:      Non-NULL pointer to array of cg_count coherent group structures.
+ *
+ * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups,
+ * where each core group may have a physically different layout.
+ */
+struct kbase_hwcnt_gpu_v4_info {
+	size_t cg_count;
+	const struct mali_base_gpu_coherent_group *cgs;
+};
+
+/**
+ * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs.
+ * @l2_count:   L2 cache count.
+ * @core_mask:  Shader core mask. May be sparse.
+ */
+struct kbase_hwcnt_gpu_v5_info {
+	size_t l2_count;
+	u64 core_mask;
+};
+
+/**
+ * struct kbase_hwcnt_gpu_info - Tagged union with information about the current
+ *                               GPU's hwcnt blocks.
+ * @type: GPU type.
+ * @v4:   Info filled in if a v4 GPU.
+ * @v5:   Info filled in if a v5 GPU.
+ */
+struct kbase_hwcnt_gpu_info {
+	enum kbase_hwcnt_gpu_group_type type;
+	union {
+		struct kbase_hwcnt_gpu_v4_info v4;
+		struct kbase_hwcnt_gpu_v5_info v5;
+	};
+};
+
+/**
+ * kbase_hwcnt_gpu_info_init() - Initialise an info structure used to create the
+ *                               hwcnt metadata.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @info:  Non-NULL pointer to data structure to be filled in.
+ *
+ * The initialised info struct will only be valid for use while kbdev is valid.
+ */
+int kbase_hwcnt_gpu_info_init(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_gpu_info *info);
+
+/**
+ * kbase_hwcnt_gpu_metadata_create() - Create hardware counter metadata for the
+ *                                     current GPU.
+ * @info:           Non-NULL pointer to info struct initialised by
+ *                  kbase_hwcnt_gpu_info_init.
+ * @use_secondary:  True if secondary performance counters should be used, else
+ *                  false. Ignored if secondary counters are not supported.
+ * @out_metadata:   Non-NULL pointer to where created metadata is stored on
+ *                  success.
+ * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
+ *                  buffer is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_gpu_metadata_create(
+	const struct kbase_hwcnt_gpu_info *info,
+	bool use_secondary,
+	const struct kbase_hwcnt_metadata **out_metadata,
+	size_t *out_dump_bytes);
+
+/**
+ * kbase_hwcnt_gpu_metadata_destroy() - Destroy GPU hardware counter metadata.
+ * @metadata: Pointer to metadata to destroy.
+ */
+void kbase_hwcnt_gpu_metadata_destroy(
+	const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_gpu_dump_get() - Copy or accumulate enabled counters from the raw
+ *                              dump buffer in src into the dump buffer
+ *                              abstraction in dst.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src raw dump buffer, of same length
+ *                  as returned in out_dump_bytes parameter of
+ *                  kbase_hwcnt_gpu_metadata_create.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @pm_core_mask:   PM state synchronized shaders core mask with the dump.
+ * @accumulate:     True if counters in src should be accumulated into dst,
+ *                  rather than copied.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata as
+ * returned from the call to kbase_hwcnt_gpu_metadata_create as was used to get
+ * the length of src.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_gpu_dump_get(
+	struct kbase_hwcnt_dump_buffer *dst,
+	void *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map,
+	const u64 pm_core_mask,
+	bool accumulate);
+
+/**
+ * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
+ *                                            into a physical enable map.
+ * @dst: Non-NULL pointer to dst physical enable map.
+ * @src: Non-NULL pointer to src enable map abstraction.
+ *
+ * The src must have been created from a metadata returned from a call to
+ * kbase_hwcnt_gpu_metadata_create.
+ *
+ * This is a lossy conversion, as the enable map abstraction has one bit per
+ * individual counter block value, but the physical enable map uses 1 bit for
+ * every 4 counters, shared over all instances of a block.
+ */
+void kbase_hwcnt_gpu_enable_map_to_physical(
+	struct kbase_hwcnt_physical_enable_map *dst,
+	const struct kbase_hwcnt_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
+ *                                              an enable map abstraction.
+ * @dst: Non-NULL pointer to dst enable map abstraction.
+ * @src: Non-NULL pointer to src physical enable map.
+ *
+ * The dst must have been created from a metadata returned from a call to
+ * kbase_hwcnt_gpu_metadata_create.
+ *
+ * This is a lossy conversion, as the physical enable map can technically
+ * support counter blocks with 128 counters each, but no hardware actually uses
+ * more than 64, so the enable map abstraction has nowhere to store the enable
+ * information for the 64 non-existent counters.
+ */
+void kbase_hwcnt_gpu_enable_map_from_physical(
+	struct kbase_hwcnt_enable_map *dst,
+	const struct kbase_hwcnt_physical_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
+ *                                        enable headers in a dump buffer to
+ *                                        reflect the specified enable map.
+ * @buf:        Non-NULL pointer to dump buffer to patch.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * The buf and enable_map must have been created from a metadata returned from
+ * a call to kbase_hwcnt_gpu_metadata_create.
+ *
+ * This function should be used before handing off a dump buffer over the
+ * kernel-user boundary, to ensure the header is accurate for the enable map
+ * used by the user.
+ */
+void kbase_hwcnt_gpu_patch_dump_headers(
+	struct kbase_hwcnt_dump_buffer *buf,
+	const struct kbase_hwcnt_enable_map *enable_map);
+
+#endif /* _KBASE_HWCNT_GPU_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c
new file mode 100644
index 0000000..b0e6aee
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.c
@@ -0,0 +1,152 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_legacy.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_ioctl.h"
+
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+/**
+ * struct kbase_hwcnt_legacy_client - Legacy hardware counter client.
+ * @user_dump_buf: Pointer to a non-NULL user buffer, where dumps are returned.
+ * @enable_map:    Counter enable map.
+ * @dump_buf:      Dump buffer used to manipulate dumps before copied to user.
+ * @hvcli:         Hardware counter virtualizer client.
+ */
+struct kbase_hwcnt_legacy_client {
+	void __user *user_dump_buf;
+	struct kbase_hwcnt_enable_map enable_map;
+	struct kbase_hwcnt_dump_buffer dump_buf;
+	struct kbase_hwcnt_virtualizer_client *hvcli;
+};
+
+int kbase_hwcnt_legacy_client_create(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_ioctl_hwcnt_enable *enable,
+	struct kbase_hwcnt_legacy_client **out_hlcli)
+{
+	int errcode;
+	struct kbase_hwcnt_legacy_client *hlcli;
+	const struct kbase_hwcnt_metadata *metadata;
+	struct kbase_hwcnt_physical_enable_map phys_em;
+
+	if (!hvirt || !enable || !enable->dump_buffer || !out_hlcli)
+		return -EINVAL;
+
+	metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
+
+	hlcli = kzalloc(sizeof(*hlcli), GFP_KERNEL);
+	if (!hlcli)
+		return -ENOMEM;
+
+	hlcli->user_dump_buf = (void __user *)(uintptr_t)enable->dump_buffer;
+
+	errcode = kbase_hwcnt_enable_map_alloc(metadata, &hlcli->enable_map);
+	if (errcode)
+		goto error;
+
+	/* Translate from the ioctl enable map to the internal one */
+	phys_em.jm_bm = enable->jm_bm;
+	phys_em.shader_bm = enable->shader_bm;
+	phys_em.tiler_bm = enable->tiler_bm;
+	phys_em.mmu_l2_bm = enable->mmu_l2_bm;
+	kbase_hwcnt_gpu_enable_map_from_physical(&hlcli->enable_map, &phys_em);
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hlcli->dump_buf);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_virtualizer_client_create(
+		hvirt, &hlcli->enable_map, &hlcli->hvcli);
+	if (errcode)
+		goto error;
+
+	*out_hlcli = hlcli;
+	return 0;
+
+error:
+	kbase_hwcnt_legacy_client_destroy(hlcli);
+	return errcode;
+}
+
+void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli)
+{
+	if (!hlcli)
+		return;
+
+	kbase_hwcnt_virtualizer_client_destroy(hlcli->hvcli);
+	kbase_hwcnt_dump_buffer_free(&hlcli->dump_buf);
+	kbase_hwcnt_enable_map_free(&hlcli->enable_map);
+	kfree(hlcli);
+}
+
+int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli)
+{
+	int errcode;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	if (!hlcli)
+		return -EINVAL;
+
+	/* Dump into the kernel buffer */
+	errcode = kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
+		&ts_start_ns, &ts_end_ns, &hlcli->dump_buf);
+	if (errcode)
+		return errcode;
+
+	/* Patch the dump buf headers, to hide the counters that other hwcnt
+	 * clients are using.
+	 */
+	kbase_hwcnt_gpu_patch_dump_headers(
+		&hlcli->dump_buf, &hlcli->enable_map);
+
+	/* Zero all non-enabled counters (current values are undefined) */
+	kbase_hwcnt_dump_buffer_zero_non_enabled(
+		&hlcli->dump_buf, &hlcli->enable_map);
+
+	/* Copy into the user's buffer */
+	errcode = copy_to_user(hlcli->user_dump_buf, hlcli->dump_buf.dump_buf,
+		hlcli->dump_buf.metadata->dump_buf_bytes);
+	/* Non-zero errcode implies user buf was invalid or too small */
+	if (errcode)
+		return -EFAULT;
+
+	return 0;
+}
+
+int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli)
+{
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	if (!hlcli)
+		return -EINVAL;
+
+	/* Dump with a NULL buffer to clear this client's counters */
+	return kbase_hwcnt_virtualizer_client_dump(hlcli->hvcli,
+		&ts_start_ns, &ts_end_ns, NULL);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h
new file mode 100644
index 0000000..7a610ae
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_legacy.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Legacy hardware counter interface, giving userspace clients simple,
+ * synchronous access to hardware counters.
+ *
+ * Any functions operating on an single legacy hardware counter client instance
+ * must be externally synchronised.
+ * Different clients may safely be used concurrently.
+ */
+
+#ifndef _KBASE_HWCNT_LEGACY_H_
+#define _KBASE_HWCNT_LEGACY_H_
+
+struct kbase_hwcnt_legacy_client;
+struct kbase_ioctl_hwcnt_enable;
+struct kbase_hwcnt_virtualizer;
+
+/**
+ * kbase_hwcnt_legacy_client_create() - Create a legacy hardware counter client.
+ * @hvirt:     Non-NULL pointer to hardware counter virtualizer the client
+ *             should be attached to.
+ * @enable:    Non-NULL pointer to hwcnt_enable structure, containing a valid
+ *             pointer to a user dump buffer large enough to hold a dump, and
+ *             the counters that should be enabled.
+ * @out_hlcli: Non-NULL pointer to where the pointer to the created client will
+ *             be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_legacy_client_create(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_ioctl_hwcnt_enable *enable,
+	struct kbase_hwcnt_legacy_client **out_hlcli);
+
+/**
+ * kbase_hwcnt_legacy_client_destroy() - Destroy a legacy hardware counter
+ *                                       client.
+ * @hlcli: Pointer to the legacy hardware counter client.
+ *
+ * Will safely destroy a client in any partial state of construction.
+ */
+void kbase_hwcnt_legacy_client_destroy(struct kbase_hwcnt_legacy_client *hlcli);
+
+/**
+ * kbase_hwcnt_legacy_client_dump() - Perform a hardware counter dump into the
+ *                                    client's user buffer.
+ * @hlcli: Non-NULL pointer to the legacy hardware counter client.
+ *
+ * This function will synchronously dump hardware counters into the user buffer
+ * specified on client creation, with the counters specified on client creation.
+ *
+ * The counters are automatically cleared after each dump, such that the next
+ * dump performed will return the counter values accumulated between the time of
+ * this function call and the next dump.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_legacy_client_dump(struct kbase_hwcnt_legacy_client *hlcli);
+
+/**
+ * kbase_hwcnt_legacy_client_clear() - Perform and discard a hardware counter
+ *                                     dump.
+ * @hlcli: Non-NULL pointer to the legacy hardware counter client.
+ *
+ * This function will synchronously clear the hardware counters, such that the
+ * next dump performed will return the counter values accumulated between the
+ * time of this function call and the next dump.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_legacy_client_clear(struct kbase_hwcnt_legacy_client *hlcli);
+
+#endif /* _KBASE_HWCNT_LEGACY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c
new file mode 100644
index 0000000..1e9efde
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.c
@@ -0,0 +1,538 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+
+/* Minimum alignment of each block of hardware counters */
+#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT \
+	(KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES)
+
+/**
+ * KBASE_HWCNT_ALIGN_UPWARDS() - Align a value to an alignment.
+ * @value:     The value to align upwards.
+ * @alignment: The alignment.
+ *
+ * Return: A number greater than or equal to value that is aligned to alignment.
+ */
+#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \
+	(value + ((alignment - (value % alignment)) % alignment))
+
+int kbase_hwcnt_metadata_create(
+	const struct kbase_hwcnt_description *desc,
+	const struct kbase_hwcnt_metadata **out_metadata)
+{
+	char *buf;
+	struct kbase_hwcnt_metadata *metadata;
+	struct kbase_hwcnt_group_metadata *grp_mds;
+	size_t grp;
+	size_t enable_map_count; /* Number of u64 bitfields (inc padding) */
+	size_t dump_buf_count; /* Number of u32 values (inc padding) */
+	size_t avail_mask_bits; /* Number of availability mask bits */
+
+	size_t size;
+	size_t offset;
+
+	if (!desc || !out_metadata)
+		return -EINVAL;
+
+	/* Calculate the bytes needed to tightly pack the metadata */
+
+	/* Top level metadata */
+	size = 0;
+	size += sizeof(struct kbase_hwcnt_metadata);
+
+	/* Group metadata */
+	size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+	/* Block metadata */
+	for (grp = 0; grp < desc->grp_cnt; grp++) {
+		size += sizeof(struct kbase_hwcnt_block_metadata) *
+			desc->grps[grp].blk_cnt;
+	}
+
+	/* Single allocation for the entire metadata */
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Use the allocated memory for the metadata and its members */
+
+	/* Bump allocate the top level metadata */
+	offset = 0;
+	metadata = (struct kbase_hwcnt_metadata *)(buf + offset);
+	offset += sizeof(struct kbase_hwcnt_metadata);
+
+	/* Bump allocate the group metadata */
+	grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset);
+	offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+	enable_map_count = 0;
+	dump_buf_count = 0;
+	avail_mask_bits = 0;
+
+	for (grp = 0; grp < desc->grp_cnt; grp++) {
+		size_t blk;
+
+		const struct kbase_hwcnt_group_description *grp_desc =
+			desc->grps + grp;
+		struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp;
+
+		size_t group_enable_map_count = 0;
+		size_t group_dump_buffer_count = 0;
+		size_t group_avail_mask_bits = 0;
+
+		/* Bump allocate this group's block metadata */
+		struct kbase_hwcnt_block_metadata *blk_mds =
+			(struct kbase_hwcnt_block_metadata *)(buf + offset);
+		offset += sizeof(struct kbase_hwcnt_block_metadata) *
+			grp_desc->blk_cnt;
+
+		/* Fill in each block in the group's information */
+		for (blk = 0; blk < grp_desc->blk_cnt; blk++) {
+			const struct kbase_hwcnt_block_description *blk_desc =
+				grp_desc->blks + blk;
+			struct kbase_hwcnt_block_metadata *blk_md =
+				blk_mds + blk;
+			const size_t n_values =
+				blk_desc->hdr_cnt + blk_desc->ctr_cnt;
+
+			blk_md->type = blk_desc->type;
+			blk_md->inst_cnt = blk_desc->inst_cnt;
+			blk_md->hdr_cnt = blk_desc->hdr_cnt;
+			blk_md->ctr_cnt = blk_desc->ctr_cnt;
+			blk_md->enable_map_index = group_enable_map_count;
+			blk_md->enable_map_stride =
+				kbase_hwcnt_bitfield_count(n_values);
+			blk_md->dump_buf_index = group_dump_buffer_count;
+			blk_md->dump_buf_stride =
+				KBASE_HWCNT_ALIGN_UPWARDS(
+					n_values,
+					(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+					 KBASE_HWCNT_VALUE_BYTES));
+			blk_md->avail_mask_index = group_avail_mask_bits;
+
+			group_enable_map_count +=
+				blk_md->enable_map_stride * blk_md->inst_cnt;
+			group_dump_buffer_count +=
+				blk_md->dump_buf_stride * blk_md->inst_cnt;
+			group_avail_mask_bits += blk_md->inst_cnt;
+		}
+
+		/* Fill in the group's information */
+		grp_md->type = grp_desc->type;
+		grp_md->blk_cnt = grp_desc->blk_cnt;
+		grp_md->blk_metadata = blk_mds;
+		grp_md->enable_map_index = enable_map_count;
+		grp_md->dump_buf_index = dump_buf_count;
+		grp_md->avail_mask_index = avail_mask_bits;
+
+		enable_map_count += group_enable_map_count;
+		dump_buf_count += group_dump_buffer_count;
+		avail_mask_bits += group_avail_mask_bits;
+	}
+
+	/* Fill in the top level metadata's information */
+	metadata->grp_cnt = desc->grp_cnt;
+	metadata->grp_metadata = grp_mds;
+	metadata->enable_map_bytes =
+		enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
+	metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
+	metadata->avail_mask = desc->avail_mask;
+
+	WARN_ON(size != offset);
+	/* Due to the block alignment, there should be exactly one enable map
+	 * bit per 4 bytes in the dump buffer.
+	 */
+	WARN_ON(metadata->dump_buf_bytes !=
+		(metadata->enable_map_bytes *
+		 BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
+
+	*out_metadata = metadata;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_create);
+
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+	kfree(metadata);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_metadata_destroy);
+
+int kbase_hwcnt_enable_map_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_enable_map *enable_map)
+{
+	u64 *enable_map_buf;
+
+	if (!metadata || !enable_map)
+		return -EINVAL;
+
+	enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
+	if (!enable_map_buf)
+		return -ENOMEM;
+
+	enable_map->metadata = metadata;
+	enable_map->enable_map = enable_map_buf;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc);
+
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
+{
+	if (!enable_map)
+		return;
+
+	kfree(enable_map->enable_map);
+	enable_map->enable_map = NULL;
+	enable_map->metadata = NULL;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free);
+
+int kbase_hwcnt_dump_buffer_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	u32 *buf;
+
+	if (!metadata || !dump_buf)
+		return -EINVAL;
+
+	buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	dump_buf->metadata = metadata;
+	dump_buf->dump_buf = buf;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc);
+
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	if (!dump_buf)
+		return;
+
+	kfree(dump_buf->dump_buf);
+	memset(dump_buf, 0, sizeof(*dump_buf));
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_free);
+
+int kbase_hwcnt_dump_buffer_array_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	size_t n,
+	struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+	struct kbase_hwcnt_dump_buffer *buffers;
+	size_t buf_idx;
+	unsigned int order;
+	unsigned long addr;
+
+	if (!metadata || !dump_bufs)
+		return -EINVAL;
+
+	/* Allocate memory for the dump buffer struct array */
+	buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers)
+		return -ENOMEM;
+
+	/* Allocate pages for the actual dump buffers, as they tend to be fairly
+	 * large.
+	 */
+	order = get_order(metadata->dump_buf_bytes * n);
+	addr = __get_free_pages(GFP_KERNEL, order);
+
+	if (!addr) {
+		kfree(buffers);
+		return -ENOMEM;
+	}
+
+	dump_bufs->page_addr = addr;
+	dump_bufs->page_order = order;
+	dump_bufs->buf_cnt = n;
+	dump_bufs->bufs = buffers;
+
+	/* Set the buffer of each dump buf */
+	for (buf_idx = 0; buf_idx < n; buf_idx++) {
+		const size_t offset = metadata->dump_buf_bytes * buf_idx;
+
+		buffers[buf_idx].metadata = metadata;
+		buffers[buf_idx].dump_buf = (u32 *)(addr + offset);
+	}
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_alloc);
+
+void kbase_hwcnt_dump_buffer_array_free(
+	struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+	if (!dump_bufs)
+		return;
+
+	kfree(dump_bufs->bufs);
+	free_pages(dump_bufs->page_addr, dump_bufs->page_order);
+	memset(dump_bufs, 0, sizeof(*dump_bufs));
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_array_free);
+
+void kbase_hwcnt_dump_buffer_zero(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk;
+		size_t val_cnt;
+
+		if (!kbase_hwcnt_enable_map_block_enabled(
+			dst_enable_map, grp, blk, blk_inst))
+			continue;
+
+		dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		val_cnt = kbase_hwcnt_metadata_block_values_count(
+			metadata, grp, blk);
+
+		kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero);
+
+void kbase_hwcnt_dump_buffer_zero_strict(
+	struct kbase_hwcnt_dump_buffer *dst)
+{
+	if (WARN_ON(!dst))
+		return;
+
+	memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict);
+
+void kbase_hwcnt_dump_buffer_zero_non_enabled(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+			dst_enable_map, grp, blk, blk_inst);
+		size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+			metadata, grp, blk);
+
+		/* Align upwards to include padding bytes */
+		val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
+			(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+			 KBASE_HWCNT_VALUE_BYTES));
+
+		if (kbase_hwcnt_metadata_block_instance_avail(
+			metadata, grp, blk, blk_inst)) {
+			/* Block available, so only zero non-enabled values */
+			kbase_hwcnt_dump_buffer_block_zero_non_enabled(
+				dst_blk, blk_em, val_cnt);
+		} else {
+			/* Block not available, so zero the entire thing */
+			kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+		}
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_non_enabled);
+
+void kbase_hwcnt_dump_buffer_copy(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!src) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk;
+		const u32 *src_blk;
+		size_t val_cnt;
+
+		if (!kbase_hwcnt_enable_map_block_enabled(
+			dst_enable_map, grp, blk, blk_inst))
+			continue;
+
+		dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		src_blk = kbase_hwcnt_dump_buffer_block_instance(
+			src, grp, blk, blk_inst);
+		val_cnt = kbase_hwcnt_metadata_block_values_count(
+			metadata, grp, blk);
+
+		kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy);
+
+void kbase_hwcnt_dump_buffer_copy_strict(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!src) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+			src, grp, blk, blk_inst);
+		const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+			dst_enable_map, grp, blk, blk_inst);
+		size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+			metadata, grp, blk);
+		/* Align upwards to include padding bytes */
+		val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(val_cnt,
+			(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+			 KBASE_HWCNT_VALUE_BYTES));
+
+		kbase_hwcnt_dump_buffer_block_copy_strict(
+			dst_blk, src_blk, blk_em, val_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict);
+
+void kbase_hwcnt_dump_buffer_accumulate(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!src) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk;
+		const u32 *src_blk;
+		size_t hdr_cnt;
+		size_t ctr_cnt;
+
+		if (!kbase_hwcnt_enable_map_block_enabled(
+			dst_enable_map, grp, blk, blk_inst))
+			continue;
+
+		dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		src_blk = kbase_hwcnt_dump_buffer_block_instance(
+			src, grp, blk, blk_inst);
+		hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
+			metadata, grp, blk);
+		ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
+			metadata, grp, blk);
+
+		kbase_hwcnt_dump_buffer_block_accumulate(
+			dst_blk, src_blk, hdr_cnt, ctr_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate);
+
+void kbase_hwcnt_dump_buffer_accumulate_strict(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) ||
+	    WARN_ON(!src) ||
+	    WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
+		u32 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(
+			dst, grp, blk, blk_inst);
+		const u32 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+			src, grp, blk, blk_inst);
+		const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+			dst_enable_map, grp, blk, blk_inst);
+		size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(
+			metadata, grp, blk);
+		size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(
+			metadata, grp, blk);
+		/* Align upwards to include padding bytes */
+		ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(hdr_cnt + ctr_cnt,
+			(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+			 KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
+
+		kbase_hwcnt_dump_buffer_block_accumulate_strict(
+			dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h
new file mode 100644
index 0000000..4d78c84
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_types.h
@@ -0,0 +1,1087 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter types.
+ * Contains structures for describing the physical layout of hardware counter
+ * dump buffers and enable maps within a system.
+ *
+ * Also contains helper functions for manipulation of these dump buffers and
+ * enable maps.
+ *
+ * Through use of these structures and functions, hardware counters can be
+ * enabled, copied, accumulated, and generally manipulated in a generic way,
+ * regardless of the physical counter dump layout.
+ *
+ * Terminology:
+ *
+ * Hardware Counter System:
+ *   A collection of hardware counter groups, making a full hardware counter
+ *   system.
+ * Hardware Counter Group:
+ *   A group of Hardware Counter Blocks (e.g. a t62x might have more than one
+ *   core group, so has one counter group per core group, where each group
+ *   may have a different number and layout of counter blocks).
+ * Hardware Counter Block:
+ *   A block of hardware counters (e.g. shader block, tiler block).
+ * Hardware Counter Block Instance:
+ *   An instance of a Hardware Counter Block (e.g. an MP4 GPU might have
+ *   4 shader block instances).
+ *
+ * Block Header:
+ *   A header value inside a counter block. Headers don't count anything,
+ *   so it is only valid to copy or zero them. Headers are always the first
+ *   values in the block.
+ * Block Counter:
+ *   A counter value inside a counter block. Counters can be zeroed, copied,
+ *   or accumulated. Counters are always immediately after the headers in the
+ *   block.
+ * Block Value:
+ *   A catch-all term for block headers and block counters.
+ *
+ * Enable Map:
+ *   An array of u64 bitfields, where each bit either enables exactly one
+ *   block value, or is unused (padding).
+ * Dump Buffer:
+ *   An array of u32 values, where each u32 corresponds either to one block
+ *   value, or is unused (padding).
+ * Availability Mask:
+ *   A bitfield, where each bit corresponds to whether a block instance is
+ *   physically available (e.g. an MP3 GPU may have a sparse core mask of
+ *   0b1011, meaning it only has 3 cores but for hardware counter dumps has the
+ *   same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this
+ *   case, the availability mask might be 0b1011111 (the exact layout will
+ *   depend on the specific hardware architecture), with the 3 extra early bits
+ *   corresponding to other block instances in the hardware counter system).
+ * Metadata:
+ *   Structure describing the physical layout of the enable map and dump buffers
+ *   for a specific hardware counter system.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_TYPES_H_
+#define _KBASE_HWCNT_TYPES_H_
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include "mali_malisw.h"
+
+/* Number of bytes in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64))
+
+/* Number of bits in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE)
+
+/* Number of bytes for each counter value */
+#define KBASE_HWCNT_VALUE_BYTES (sizeof(u32))
+
+/* Number of bits in an availability mask (i.e. max total number of block
+ * instances supported in a Hardware Counter System)
+ */
+#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE)
+
+/**
+ * struct kbase_hwcnt_block_description - Description of one or more identical,
+ *                                        contiguous, Hardware Counter Blocks.
+ * @type:     The arbitrary identifier used to identify the type of the block.
+ * @inst_cnt: The number of Instances of the block.
+ * @hdr_cnt:  The number of 32-bit Block Headers in the block.
+ * @ctr_cnt:  The number of 32-bit Block Counters in the block.
+ */
+struct kbase_hwcnt_block_description {
+	u64 type;
+	size_t inst_cnt;
+	size_t hdr_cnt;
+	size_t ctr_cnt;
+};
+
+/**
+ * struct kbase_hwcnt_group_description - Description of one or more identical,
+ *                                        contiguous Hardware Counter Groups.
+ * @type:    The arbitrary identifier used to identify the type of the group.
+ * @blk_cnt: The number of types of Hardware Counter Block in the group.
+ * @blks:    Non-NULL pointer to an array of blk_cnt block descriptions,
+ *           describing each type of Hardware Counter Block in the group.
+ */
+struct kbase_hwcnt_group_description {
+	u64 type;
+	size_t blk_cnt;
+	const struct kbase_hwcnt_block_description *blks;
+};
+
+/**
+ * struct kbase_hwcnt_description - Description of a Hardware Counter System.
+ * @grp_cnt:    The number of Hardware Counter Groups.
+ * @grps:       Non-NULL pointer to an array of grp_cnt group descriptions,
+ *              describing each Hardware Counter Group in the system.
+ * @avail_mask: Flat Availability Mask for all block instances in the system.
+ */
+struct kbase_hwcnt_description {
+	size_t grp_cnt;
+	const struct kbase_hwcnt_group_description *grps;
+	u64 avail_mask;
+};
+
+/**
+ * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout
+ *                                     of a block in a Hardware Counter System's
+ *                                     Dump Buffers and Enable Maps.
+ * @type:              The arbitrary identifier used to identify the type of the
+ *                     block.
+ * @inst_cnt:          The number of Instances of the block.
+ * @hdr_cnt:           The number of 32-bit Block Headers in the block.
+ * @ctr_cnt:           The number of 32-bit Block Counters in the block.
+ * @enable_map_index:  Index in u64s into the parent's Enable Map where the
+ *                     Enable Map bitfields of the Block Instances described by
+ *                     this metadata start.
+ * @enable_map_stride: Stride in u64s between the Enable Maps of each of the
+ *                     Block Instances described by this metadata.
+ * @dump_buf_index:    Index in u32s into the parent's Dump Buffer where the
+ *                     Dump Buffers of the Block Instances described by this
+ *                     metadata start.
+ * @dump_buf_stride:   Stride in u32s between the Dump Buffers of each of the
+ *                     Block Instances described by this metadata.
+ * @avail_mask_index:  Index in bits into the parent's Availability Mask where
+ *                     the Availability Masks of the Block Instances described
+ *                     by this metadata start.
+ */
+struct kbase_hwcnt_block_metadata {
+	u64 type;
+	size_t inst_cnt;
+	size_t hdr_cnt;
+	size_t ctr_cnt;
+	size_t enable_map_index;
+	size_t enable_map_stride;
+	size_t dump_buf_index;
+	size_t dump_buf_stride;
+	size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout
+ *                                     of a group of blocks in a Hardware
+ *                                     Counter System's Dump Buffers and Enable
+ *                                     Maps.
+ * @type:             The arbitrary identifier used to identify the type of the
+ *                    group.
+ * @blk_cnt:          The number of types of Hardware Counter Block in the
+ *                    group.
+ * @blk_metadata:     Non-NULL pointer to an array of blk_cnt block metadata,
+ *                    describing the physical layout of each type of Hardware
+ *                    Counter Block in the group.
+ * @enable_map_index: Index in u64s into the parent's Enable Map where the
+ *                    Enable Maps of the blocks within the group described by
+ *                    this metadata start.
+ * @dump_buf_index:   Index in u32s into the parent's Dump Buffer where the
+ *                    Dump Buffers of the blocks within the group described by
+ *                    metadata start.
+ * @avail_mask_index: Index in bits into the parent's Availability Mask where
+ *                    the Availability Masks of the blocks within the group
+ *                    described by this metadata start.
+ */
+struct kbase_hwcnt_group_metadata {
+	u64 type;
+	size_t blk_cnt;
+	const struct kbase_hwcnt_block_metadata *blk_metadata;
+	size_t enable_map_index;
+	size_t dump_buf_index;
+	size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_metadata - Metadata describing the physical layout
+ *                               of Dump Buffers and Enable Maps within a
+ *                               Hardware Counter System.
+ * @grp_cnt:          The number of Hardware Counter Groups.
+ * @grp_metadata:     Non-NULL pointer to an array of grp_cnt group metadata,
+ *                    describing the physical layout of each Hardware Counter
+ *                    Group in the system.
+ * @enable_map_bytes: The size in bytes of an Enable Map needed for the system.
+ * @dump_buf_bytes:   The size in bytes of a Dump Buffer needed for the system.
+ * @avail_mask:       The Availability Mask for the system.
+ */
+struct kbase_hwcnt_metadata {
+	size_t grp_cnt;
+	const struct kbase_hwcnt_group_metadata *grp_metadata;
+	size_t enable_map_bytes;
+	size_t dump_buf_bytes;
+	u64 avail_mask;
+};
+
+/**
+ * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64
+ *                                 bitfields.
+ * @metadata:   Non-NULL pointer to metadata used to identify, and to describe
+ *              the layout of the enable map.
+ * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array
+ *              of u64 bitfields, each bit of which enables one hardware
+ *              counter.
+ */
+struct kbase_hwcnt_enable_map {
+	const struct kbase_hwcnt_metadata *metadata;
+	u64 *enable_map;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. Array of u32
+ *                                  values.
+ * @metadata: Non-NULL pointer to metadata used to identify, and to describe
+ *            the layout of the Dump Buffer.
+ * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array
+ *            of u32 values.
+ */
+struct kbase_hwcnt_dump_buffer {
+	const struct kbase_hwcnt_metadata *metadata;
+	u32 *dump_buf;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array.
+ * @page_addr:  Address of allocated pages. A single allocation is used for all
+ *              Dump Buffers in the array.
+ * @page_order: The allocation order of the pages.
+ * @buf_cnt:    The number of allocated Dump Buffers.
+ * @bufs:       Non-NULL pointer to the array of Dump Buffers.
+ */
+struct kbase_hwcnt_dump_buffer_array {
+	unsigned long page_addr;
+	unsigned int page_order;
+	size_t buf_cnt;
+	struct kbase_hwcnt_dump_buffer *bufs;
+};
+
+/**
+ * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object
+ *                                 from a description.
+ * @desc:     Non-NULL pointer to a hardware counter description.
+ * @metadata: Non-NULL pointer to where created metadata will be stored on
+ *            success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_metadata_create(
+	const struct kbase_hwcnt_description *desc,
+	const struct kbase_hwcnt_metadata **metadata);
+
+/**
+ * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object.
+ * @metadata: Pointer to hardware counter metadata
+ */
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_metadata_group_count() - Get the number of groups.
+ * @metadata: Non-NULL pointer to metadata.
+ *
+ * Return: Number of hardware counter groups described by metadata.
+ */
+#define kbase_hwcnt_metadata_group_count(metadata) \
+	((metadata)->grp_cnt)
+
+/**
+ * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ *
+ * Return: Type of the group grp.
+ */
+#define kbase_hwcnt_metadata_group_type(metadata, grp) \
+	((metadata)->grp_metadata[(grp)].type)
+
+/**
+ * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ *
+ * Return: Number of blocks in group grp.
+ */
+#define kbase_hwcnt_metadata_block_count(metadata, grp) \
+	((metadata)->grp_metadata[(grp)].blk_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Type of the block blk in group grp.
+ */
+#define kbase_hwcnt_metadata_block_type(metadata, grp, blk) \
+	((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].type)
+
+/**
+ * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of
+ *                                               a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of instances of block blk in group grp.
+ */
+#define kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk) \
+	((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].inst_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter
+ *                                              headers.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of u32 counter headers in each instance of block blk in
+ *         group grp.
+ */
+#define kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk) \
+	((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].hdr_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of u32 counters in each instance of block blk in group
+ *         grp.
+ */
+#define kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) \
+	((metadata)->grp_metadata[(grp)].blk_metadata[(blk)].ctr_cnt)
+
+/**
+ * kbase_hwcnt_metadata_block_values_count() - Get the number of values.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of u32 headers plus counters in each instance of block blk
+ *         in group grp.
+ */
+#define kbase_hwcnt_metadata_block_values_count(metadata, grp, blk) \
+	(kbase_hwcnt_metadata_block_counters_count((metadata), (grp), (blk)) \
+	+ kbase_hwcnt_metadata_block_headers_count((metadata), (grp), (blk)))
+
+/**
+ * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in
+ *                                         the metadata.
+ * @md:       Non-NULL pointer to metadata.
+ * @grp:      size_t variable used as group iterator.
+ * @blk:      size_t variable used as block iterator.
+ * @blk_inst: size_t variable used as block instance iterator.
+ *
+ * Iteration order is group, then block, then block instance (i.e. linearly
+ * through memory).
+ */
+#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \
+	for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \
+		for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \
+			for ((blk_inst) = 0; (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); (blk_inst)++)
+
+/**
+ * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail
+ *                                          mask corresponding to the block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: The bit index into the avail mask for the block.
+ */
+static inline size_t kbase_hwcnt_metadata_block_avail_bit(
+	const struct kbase_hwcnt_metadata *metadata,
+	size_t grp,
+	size_t blk)
+{
+	const size_t bit =
+		metadata->grp_metadata[grp].avail_mask_index +
+		metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index;
+
+	return bit;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is
+ *                                               available.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: true if the block instance is available, else false.
+ */
+static inline bool kbase_hwcnt_metadata_block_instance_avail(
+	const struct kbase_hwcnt_metadata *metadata,
+	size_t grp,
+	size_t blk,
+	size_t blk_inst)
+{
+	const size_t bit = kbase_hwcnt_metadata_block_avail_bit(
+		metadata, grp, blk) + blk_inst;
+	const u64 mask = 1ull << bit;
+
+	return (metadata->avail_mask & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_alloc() - Allocate an enable map.
+ * @metadata:   Non-NULL pointer to metadata describing the system.
+ * @enable_map: Non-NULL pointer to enable map to be initialised. Will be
+ *              initialised to all zeroes (i.e. all counters disabled).
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_enable_map_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_free() - Free an enable map.
+ * @enable_map: Enable map to be freed.
+ *
+ * Can be safely called on an all-zeroed enable map structure, or on an already
+ * freed enable map.
+ */
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block
+ *                                           instance's enable map.
+ * @map:      Non-NULL pointer to (const) enable map.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: (const) u64* to the bitfield(s) used as the enable map for the
+ *         block instance.
+ */
+#define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \
+	((map)->enable_map + \
+	 (map)->metadata->grp_metadata[(grp)].enable_map_index + \
+	 (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \
+	 (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst))
+
+/**
+ * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required
+ *                                to have at minimum one bit per value.
+ * @val_cnt: Number of values.
+ *
+ * Return: Number of required bitfields.
+ */
+static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt)
+{
+	return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) /
+		KBASE_HWCNT_BITFIELD_BITS;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block.
+ * @dst:      Non-NULL pointer to enable map.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_all(
+	struct kbase_hwcnt_enable_map *dst,
+	size_t grp,
+	size_t blk,
+	size_t blk_inst)
+{
+	const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+		dst->metadata, grp, blk);
+	const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+	u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
+		dst, grp, blk, blk_inst);
+
+	memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES);
+}
+
+/**
+ * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map.
+ * @dst: Non-NULL pointer to enable map to zero.
+ */
+static inline void kbase_hwcnt_enable_map_disable_all(
+	struct kbase_hwcnt_enable_map *dst)
+{
+	memset(dst->enable_map, 0, dst->metadata->enable_map_bytes);
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block.
+ * @dst:      Non-NULL pointer to enable map.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_all(
+	struct kbase_hwcnt_enable_map *dst,
+	size_t grp,
+	size_t blk,
+	size_t blk_inst)
+{
+	const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+		dst->metadata, grp, blk);
+	const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+	u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
+		dst, grp, blk, blk_inst);
+
+	size_t bitfld_idx;
+
+	for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+		const u64 remaining_values = val_cnt -
+			(bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+		u64 block_enable_map_mask = U64_MAX;
+
+		if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+			block_enable_map_mask = (1ull << remaining_values) - 1;
+
+		block_enable_map[bitfld_idx] = block_enable_map_mask;
+	}
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in an enable
+ *                                             map.
+ * @dst: Non-NULL pointer to enable map.
+ */
+static inline void kbase_hwcnt_enable_map_enable_all(
+	struct kbase_hwcnt_enable_map *dst)
+{
+	size_t grp, blk, blk_inst;
+
+	kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
+		kbase_hwcnt_enable_map_block_enable_all(
+			dst, grp, blk, blk_inst);
+}
+
+/**
+ * kbase_hwcnt_enable_map_copy() - Copy an enable map to another.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_copy(
+	struct kbase_hwcnt_enable_map *dst,
+	const struct kbase_hwcnt_enable_map *src)
+{
+	memcpy(dst->enable_map,
+	       src->enable_map,
+	       dst->metadata->enable_map_bytes);
+}
+
+/**
+ * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_union(
+	struct kbase_hwcnt_enable_map *dst,
+	const struct kbase_hwcnt_enable_map *src)
+{
+	const size_t bitfld_count =
+		dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES;
+	size_t i;
+
+	for (i = 0; i < bitfld_count; i++)
+		dst->enable_map[i] |= src->enable_map[i];
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block
+ *                                          instance are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ * @grp:        Index of the group in the metadata.
+ * @blk:        Index of the block in the group.
+ * @blk_inst:   Index of the block instance in the block.
+ *
+ * Return: true if any values in the block are enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_block_enabled(
+	const struct kbase_hwcnt_enable_map *enable_map,
+	size_t grp,
+	size_t blk,
+	size_t blk_inst)
+{
+	bool any_enabled = false;
+	const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
+		enable_map->metadata, grp, blk);
+	const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+	const u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
+		enable_map, grp, blk, blk_inst);
+
+	size_t bitfld_idx;
+
+	for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+		const u64 remaining_values = val_cnt -
+			(bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+		u64 block_enable_map_mask = U64_MAX;
+
+		if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+			block_enable_map_mask = (1ull << remaining_values) - 1;
+
+		any_enabled = any_enabled ||
+			(block_enable_map[bitfld_idx] & block_enable_map_mask);
+	}
+
+	return any_enabled;
+}
+
+/**
+ * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * Return: true if any values are enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_any_enabled(
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	size_t grp, blk, blk_inst;
+
+	kbase_hwcnt_metadata_for_each_block(
+		enable_map->metadata, grp, blk, blk_inst) {
+		if (kbase_hwcnt_enable_map_block_enabled(
+			enable_map, grp, blk, blk_inst))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block
+ *                                                instance is enabled.
+ * @bitfld:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to check in the block instance.
+ *
+ * Return: true if the value was enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_block_value_enabled(
+	const u64 *bitfld,
+	size_t val_idx)
+{
+	const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+	const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+	const u64 mask = 1ull << bit;
+
+	return (bitfld[idx] & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block
+ *                                               instance.
+ * @bitfld:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to enable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_value(
+	u64 *bitfld,
+	size_t val_idx)
+{
+	const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+	const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+	const u64 mask = 1ull << bit;
+
+	bitfld[idx] |= mask;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block
+ *                                                instance.
+ * @bitfld:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to disable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_value(
+	u64 *bitfld,
+	size_t val_idx)
+{
+	const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+	const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+	const u64 mask = 1ull << bit;
+
+	bitfld[idx] &= ~mask;
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer.
+ * @metadata: Non-NULL pointer to metadata describing the system.
+ * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be
+ *            initialised to undefined values, so must be used as a copy dest,
+ *            or cleared before use.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_free() - Free a dump buffer.
+ * @dump_buf: Dump buffer to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer structure, or on an already
+ * freed dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers.
+ * @metadata:  Non-NULL pointer to metadata describing the system.
+ * @n:         Number of dump buffers to allocate
+ * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. Each
+ *             dump buffer in the array will be initialised to undefined values,
+ *             so must be used as a copy dest, or cleared before use.
+ *
+ * A single contiguous page allocation will be used for all of the buffers
+ * inside the array, where:
+ * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_array_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	size_t n,
+	struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array.
+ * @dump_bufs: Dump buffer array to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer array structure, or on an
+ * already freed dump buffer array.
+ */
+void kbase_hwcnt_dump_buffer_array_free(
+	struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block
+ *                                            instance's dump buffer.
+ * @buf:      Non-NULL pointer to (const) dump buffer.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: (const) u32* to the dump buffer for the block instance.
+ */
+#define kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst) \
+	((buf)->dump_buf + \
+	 (buf)->metadata->grp_metadata[(grp)].dump_buf_index + \
+	 (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_index + \
+	 (buf)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].dump_buf_stride * (blk_inst))
+
+/**
+ * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst.
+ *                                  After the operation, all non-enabled values
+ *                                  will be undefined.
+ * @dst:            Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero(
+	u32 *dst_blk,
+	size_t val_cnt)
+{
+	memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst.
+ *                                         After the operation, all values
+ *                                         (including padding bytes) will be
+ *                                         zero.
+ *                                         Slower than the non-strict variant.
+ * @dst: Non-NULL pointer to dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_zero_strict(
+	struct kbase_hwcnt_dump_buffer *dst);
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in
+ *                                              dst (including padding bytes and
+ *                                              unavailable blocks).
+ *                                              After the operation, all enabled
+ *                                              values will be unchanged.
+ * @dst:            Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero_non_enabled(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled
+ *                                                    values in a block.
+ *                                                    After the operation, all
+ *                                                    enabled values will be
+ *                                                    unchanged.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(
+	u32 *dst_blk,
+	const u64 *blk_em,
+	size_t val_cnt)
+{
+	size_t val;
+
+	for (val = 0; val < val_cnt; val++) {
+		if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val))
+			dst_blk[val] = 0;
+	}
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst.
+ *                                  After the operation, all non-enabled values
+ *                                  will be undefined.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy(
+	u32 *dst_blk,
+	const u32 *src_blk,
+	size_t val_cnt)
+{
+	/* Copy all the counters in the block instance.
+	 * Values of non-enabled counters are undefined.
+	 */
+	memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to
+ *                                         dst.
+ *                                         After the operation, all non-enabled
+ *                                         values (including padding bytes) will
+ *                                         be zero.
+ *                                         Slower than the non-strict variant.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy_strict(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values
+ *                                               from src to dst.
+ *                                               After the operation, all
+ *                                               non-enabled values will be
+ *                                               zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ *
+ * After the copy, any disabled values in dst will be zero.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy_strict(
+	u32 *dst_blk,
+	const u32 *src_blk,
+	const u64 *blk_em,
+	size_t val_cnt)
+{
+	size_t val;
+
+	for (val = 0; val < val_cnt; val++) {
+		bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(
+			blk_em, val);
+
+		dst_blk[val] = val_enabled ? src_blk[val] : 0;
+	}
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and
+ *                                        accumulate all enabled counters from
+ *                                        src to dst.
+ *                                        After the operation, all non-enabled
+ *                                        values will be undefined.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and
+ *                                              accumulate all block counters
+ *                                              from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate(
+	u32 *dst_blk,
+	const u32 *src_blk,
+	size_t hdr_cnt,
+	size_t ctr_cnt)
+{
+	size_t ctr;
+	/* Copy all the headers in the block instance.
+	 * Values of non-enabled headers are undefined.
+	 */
+	memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES);
+
+	/* Accumulate all the counters in the block instance.
+	 * Values of non-enabled counters are undefined.
+	 */
+	for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
+		u32 *dst_ctr = dst_blk + ctr;
+		const u32 *src_ctr = src_blk + ctr;
+
+		const u32 src_counter = *src_ctr;
+		const u32 dst_counter = *dst_ctr;
+
+		/* Saturating add */
+		u32 accumulated = src_counter + dst_counter;
+
+		if (accumulated < src_counter)
+			accumulated = U32_MAX;
+
+		*dst_ctr = accumulated;
+	}
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and
+ *                                               accumulate all enabled counters
+ *                                               from src to dst.
+ *                                               After the operation, all
+ *                                               non-enabled values (including
+ *                                               padding bytes) will be zero.
+ *                                               Slower than the non-strict
+ *                                               variant.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate_strict(
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_dump_buffer *src,
+	const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block
+ *                                                     headers and accumulate
+ *                                                     all block counters from
+ *                                                     src to dst.
+ *                                                     After the operation, all
+ *                                                     non-enabled values will
+ *                                                     be zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
+	u32 *dst_blk,
+	const u32 *src_blk,
+	const u64 *blk_em,
+	size_t hdr_cnt,
+	size_t ctr_cnt)
+{
+	size_t ctr;
+
+	kbase_hwcnt_dump_buffer_block_copy_strict(
+		dst_blk, src_blk, blk_em, hdr_cnt);
+
+	for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
+		bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(
+			blk_em, ctr);
+
+		u32 *dst_ctr = dst_blk + ctr;
+		const u32 *src_ctr = src_blk + ctr;
+
+		const u32 src_counter = *src_ctr;
+		const u32 dst_counter = *dst_ctr;
+
+		/* Saturating add */
+		u32 accumulated = src_counter + dst_counter;
+
+		if (accumulated < src_counter)
+			accumulated = U32_MAX;
+
+		*dst_ctr = ctr_enabled ? accumulated : 0;
+	}
+}
+
+#endif /* _KBASE_HWCNT_TYPES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c
new file mode 100644
index 0000000..917e47c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.c
@@ -0,0 +1,790 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_accumulator.h"
+#include "mali_kbase_hwcnt_context.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+#include "mali_kbase_linux.h"
+
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+/**
+ * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure.
+ * @hctx:              Hardware counter context being virtualized.
+ * @dump_threshold_ns: Minimum threshold period for dumps between different
+ *                     clients where a new accumulator dump will not be
+ *                     performed, and instead accumulated values will be used.
+ *                     If 0, rate limiting is disabled.
+ * @metadata:          Hardware counter metadata.
+ * @lock:              Lock acquired at all entrypoints, to protect mutable
+ *                     state.
+ * @client_count:      Current number of virtualizer clients.
+ * @clients:           List of virtualizer clients.
+ * @accum:             Hardware counter accumulator. NULL if no clients.
+ * @scratch_map:       Enable map used as scratch space during counter changes.
+ * @scratch_buf:       Dump buffer used as scratch space during dumps.
+ * @ts_last_dump_ns:   End time of most recent dump across all clients.
+ */
+struct kbase_hwcnt_virtualizer {
+	struct kbase_hwcnt_context *hctx;
+	u64 dump_threshold_ns;
+	const struct kbase_hwcnt_metadata *metadata;
+	struct mutex lock;
+	size_t client_count;
+	struct list_head clients;
+	struct kbase_hwcnt_accumulator *accum;
+	struct kbase_hwcnt_enable_map scratch_map;
+	struct kbase_hwcnt_dump_buffer scratch_buf;
+	u64 ts_last_dump_ns;
+};
+
+/**
+ * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure.
+ * @node:        List node used for virtualizer client list.
+ * @hvirt:       Hardware counter virtualizer.
+ * @enable_map:  Enable map with client's current enabled counters.
+ * @accum_buf:   Dump buffer with client's current accumulated counters.
+ * @has_accum:   True if accum_buf contains any accumulated counters.
+ * @ts_start_ns: Counter collection start time of current dump.
+ */
+struct kbase_hwcnt_virtualizer_client {
+	struct list_head node;
+	struct kbase_hwcnt_virtualizer *hvirt;
+	struct kbase_hwcnt_enable_map enable_map;
+	struct kbase_hwcnt_dump_buffer accum_buf;
+	bool has_accum;
+	u64 ts_start_ns;
+};
+
+const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
+	struct kbase_hwcnt_virtualizer *hvirt)
+{
+	if (!hvirt)
+		return NULL;
+
+	return hvirt->metadata;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_metadata);
+
+/**
+ * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory.
+ * @hvcli: Pointer to virtualizer client.
+ *
+ * Will safely free a client in any partial state of construction.
+ */
+static void kbasep_hwcnt_virtualizer_client_free(
+	struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+	if (!hvcli)
+		return;
+
+	kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf);
+	kbase_hwcnt_enable_map_free(&hvcli->enable_map);
+	kfree(hvcli);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer
+ *                                         client.
+ * @metadata:  Non-NULL pointer to counter metadata.
+ * @out_hvcli: Non-NULL pointer to where created client will be stored on
+ *             success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_alloc(
+	const struct kbase_hwcnt_metadata *metadata,
+	struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *hvcli = NULL;
+
+	WARN_ON(!metadata);
+	WARN_ON(!out_hvcli);
+
+	hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL);
+	if (!hvcli)
+		return -ENOMEM;
+
+	errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf);
+	if (errcode)
+		goto error;
+
+	*out_hvcli = hvcli;
+	return 0;
+error:
+	kbasep_hwcnt_virtualizer_client_free(hvcli);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a
+ *                                              client's accumulation buffer.
+ * @hvcli:    Non-NULL pointer to virtualizer client.
+ * @dump_buf: Non-NULL pointer to dump buffer to accumulate from.
+ */
+static void kbasep_hwcnt_virtualizer_client_accumulate(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	WARN_ON(!hvcli);
+	WARN_ON(!dump_buf);
+	lockdep_assert_held(&hvcli->hvirt->lock);
+
+	if (hvcli->has_accum) {
+		/* If already some accumulation, accumulate */
+		kbase_hwcnt_dump_buffer_accumulate(
+			&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+	} else {
+		/* If no accumulation, copy */
+		kbase_hwcnt_dump_buffer_copy(
+			&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+	}
+	hvcli->has_accum = true;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter
+ *                                             accumulator after final client
+ *                                             removal.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Will safely terminate the accumulator in any partial state of initialisation.
+ */
+static void kbasep_hwcnt_virtualizer_accumulator_term(
+	struct kbase_hwcnt_virtualizer *hvirt)
+{
+	WARN_ON(!hvirt);
+	lockdep_assert_held(&hvirt->lock);
+	WARN_ON(hvirt->client_count);
+
+	kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf);
+	kbase_hwcnt_enable_map_free(&hvirt->scratch_map);
+	kbase_hwcnt_accumulator_release(hvirt->accum);
+	hvirt->accum = NULL;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter
+ *                                             accumulator before first client
+ *                                             addition.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_accumulator_init(
+	struct kbase_hwcnt_virtualizer *hvirt)
+{
+	int errcode;
+
+	WARN_ON(!hvirt);
+	lockdep_assert_held(&hvirt->lock);
+	WARN_ON(hvirt->client_count);
+	WARN_ON(hvirt->accum);
+
+	errcode = kbase_hwcnt_accumulator_acquire(
+		hvirt->hctx, &hvirt->accum);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_enable_map_alloc(
+		hvirt->metadata, &hvirt->scratch_map);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(
+		hvirt->metadata, &hvirt->scratch_buf);
+	if (errcode)
+		goto error;
+
+	return 0;
+error:
+	kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the
+ *                                       virtualizer.
+ * @hvirt:      Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:      Non-NULL pointer to the virtualizer client to add.
+ * @enable_map: Non-NULL pointer to client's initial enable map.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_add(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	int errcode = 0;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!enable_map);
+	lockdep_assert_held(&hvirt->lock);
+
+	if (hvirt->client_count == 0)
+		/* First client added, so initialise the accumulator */
+		errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt);
+	if (errcode)
+		return errcode;
+
+	hvirt->client_count += 1;
+
+	if (hvirt->client_count == 1) {
+		/* First client, so just pass the enable map onwards as is */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+			enable_map, &ts_start_ns, &ts_end_ns, NULL);
+	} else {
+		struct kbase_hwcnt_virtualizer_client *pos;
+
+		/* Make the scratch enable map the union of all enable maps */
+		kbase_hwcnt_enable_map_copy(
+			&hvirt->scratch_map, enable_map);
+		list_for_each_entry(pos, &hvirt->clients, node)
+			kbase_hwcnt_enable_map_union(
+				&hvirt->scratch_map, &pos->enable_map);
+
+		/* Set the counters with the new union enable map */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+			&hvirt->scratch_map,
+			&ts_start_ns, &ts_end_ns,
+			&hvirt->scratch_buf);
+		/* Accumulate into only existing clients' accumulation bufs */
+		if (!errcode)
+			list_for_each_entry(pos, &hvirt->clients, node)
+				kbasep_hwcnt_virtualizer_client_accumulate(
+					pos, &hvirt->scratch_buf);
+	}
+	if (errcode)
+		goto error;
+
+	list_add(&hvcli->node, &hvirt->clients);
+	hvcli->hvirt = hvirt;
+	kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+	hvcli->has_accum = false;
+	hvcli->ts_start_ns = ts_end_ns;
+
+	/* Store the most recent dump time for rate limiting */
+	hvirt->ts_last_dump_ns = ts_end_ns;
+
+	return 0;
+error:
+	hvirt->client_count -= 1;
+	if (hvirt->client_count == 0)
+		kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the
+ *                                          virtualizer.
+ * @hvirt:      Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:      Non-NULL pointer to the virtualizer client to remove.
+ */
+static void kbasep_hwcnt_virtualizer_client_remove(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+	int errcode = 0;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	lockdep_assert_held(&hvirt->lock);
+
+	list_del(&hvcli->node);
+	hvirt->client_count -= 1;
+
+	if (hvirt->client_count == 0) {
+		/* Last client removed, so terminate the accumulator */
+		kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+	} else {
+		struct kbase_hwcnt_virtualizer_client *pos;
+		/* Make the scratch enable map the union of all enable maps */
+		kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map);
+		list_for_each_entry(pos, &hvirt->clients, node)
+			kbase_hwcnt_enable_map_union(
+				&hvirt->scratch_map, &pos->enable_map);
+		/* Set the counters with the new union enable map */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+			&hvirt->scratch_map,
+			&ts_start_ns, &ts_end_ns,
+			&hvirt->scratch_buf);
+		/* Accumulate into remaining clients' accumulation bufs */
+		if (!errcode)
+			list_for_each_entry(pos, &hvirt->clients, node)
+				kbasep_hwcnt_virtualizer_client_accumulate(
+					pos, &hvirt->scratch_buf);
+
+		/* Store the most recent dump time for rate limiting */
+		hvirt->ts_last_dump_ns = ts_end_ns;
+	}
+	WARN_ON(errcode);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ *                                                currently enabled counters,
+ *                                                and enable a new set of
+ *                                                counters that will be used for
+ *                                                subsequent dumps.
+ * @hvirt:       Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @enable_map:  Non-NULL pointer to the new counter enable map for the client.
+ *               Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_set_counters(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *pos;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!enable_map);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(enable_map->metadata != hvirt->metadata);
+	WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+	lockdep_assert_held(&hvirt->lock);
+
+	/* Make the scratch enable map the union of all enable maps */
+	kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
+	list_for_each_entry(pos, &hvirt->clients, node)
+		/* Ignore the enable map of the selected client */
+		if (pos != hvcli)
+			kbase_hwcnt_enable_map_union(
+				&hvirt->scratch_map, &pos->enable_map);
+
+	/* Set the counters with the new union enable map */
+	errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum,
+		&hvirt->scratch_map, ts_start_ns, ts_end_ns,
+		&hvirt->scratch_buf);
+	if (errcode)
+		return errcode;
+
+	/* Accumulate into all accumulation bufs except the selected client's */
+	list_for_each_entry(pos, &hvirt->clients, node)
+		if (pos != hvcli)
+			kbasep_hwcnt_virtualizer_client_accumulate(
+				pos, &hvirt->scratch_buf);
+
+	/* Finally, write into the dump buf */
+	if (dump_buf) {
+		const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+		if (hvcli->has_accum) {
+			kbase_hwcnt_dump_buffer_accumulate(
+				&hvcli->accum_buf, src, &hvcli->enable_map);
+			src = &hvcli->accum_buf;
+		}
+		kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+	}
+	hvcli->has_accum = false;
+
+	/* Update the selected client's enable map */
+	kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+
+	/* Fix up the timestamps */
+	*ts_start_ns = hvcli->ts_start_ns;
+	hvcli->ts_start_ns = *ts_end_ns;
+
+	/* Store the most recent dump time for rate limiting */
+	hvirt->ts_last_dump_ns = *ts_end_ns;
+
+	return errcode;
+}
+
+int kbase_hwcnt_virtualizer_client_set_counters(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer *hvirt;
+
+	if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hvirt = hvcli->hvirt;
+
+	if ((enable_map->metadata != hvirt->metadata) ||
+	    (dump_buf && (dump_buf->metadata != hvirt->metadata)))
+		return -EINVAL;
+
+	mutex_lock(&hvirt->lock);
+
+	if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+		/*
+		 * If there's only one client with no prior accumulation, we can
+		 * completely skip the virtualize and just pass through the call
+		 * to the accumulator, saving a fair few copies and
+		 * accumulations.
+		 */
+		errcode = kbase_hwcnt_accumulator_set_counters(
+			hvirt->accum, enable_map,
+			ts_start_ns, ts_end_ns, dump_buf);
+
+		if (!errcode) {
+			/* Update the selected client's enable map */
+			kbase_hwcnt_enable_map_copy(
+				&hvcli->enable_map, enable_map);
+
+			/* Fix up the timestamps */
+			*ts_start_ns = hvcli->ts_start_ns;
+			hvcli->ts_start_ns = *ts_end_ns;
+
+			/* Store the most recent dump time for rate limiting */
+			hvirt->ts_last_dump_ns = *ts_end_ns;
+		}
+	} else {
+		/* Otherwise, do the full virtualize */
+		errcode = kbasep_hwcnt_virtualizer_client_set_counters(
+			hvirt, hvcli, enable_map,
+			ts_start_ns, ts_end_ns, dump_buf);
+	}
+
+	mutex_unlock(&hvirt->lock);
+
+	return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_set_counters);
+
+/**
+ * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ *                                        currently enabled counters.
+ * @hvirt:       Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_dump(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *pos;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+	lockdep_assert_held(&hvirt->lock);
+
+	/* Perform the dump */
+	errcode = kbase_hwcnt_accumulator_dump(hvirt->accum,
+		ts_start_ns, ts_end_ns, &hvirt->scratch_buf);
+	if (errcode)
+		return errcode;
+
+	/* Accumulate into all accumulation bufs except the selected client's */
+	list_for_each_entry(pos, &hvirt->clients, node)
+		if (pos != hvcli)
+			kbasep_hwcnt_virtualizer_client_accumulate(
+				pos, &hvirt->scratch_buf);
+
+	/* Finally, write into the dump buf */
+	if (dump_buf) {
+		const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+		if (hvcli->has_accum) {
+			kbase_hwcnt_dump_buffer_accumulate(
+				&hvcli->accum_buf, src, &hvcli->enable_map);
+			src = &hvcli->accum_buf;
+		}
+		kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+	}
+	hvcli->has_accum = false;
+
+	/* Fix up the timestamps */
+	*ts_start_ns = hvcli->ts_start_ns;
+	hvcli->ts_start_ns = *ts_end_ns;
+
+	/* Store the most recent dump time for rate limiting */
+	hvirt->ts_last_dump_ns = *ts_end_ns;
+
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the
+ *                                           client's currently enabled counters
+ *                                           if it hasn't been rate limited,
+ *                                           otherwise return the client's most
+ *                                           recent accumulation.
+ * @hvirt:       Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_dump_rate_limited(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	bool rate_limited = true;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+	lockdep_assert_held(&hvirt->lock);
+
+	if (hvirt->dump_threshold_ns == 0) {
+		/* Threshold == 0, so rate limiting disabled */
+		rate_limited = false;
+	} else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) {
+		/* Last dump was performed by this client, and dumps from an
+		 * individual client are never rate limited
+		 */
+		rate_limited = false;
+	} else {
+		const u64 ts_ns =
+			kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum);
+		const u64 time_since_last_dump_ns =
+			ts_ns - hvirt->ts_last_dump_ns;
+
+		/* Dump period equals or exceeds the threshold */
+		if (time_since_last_dump_ns >= hvirt->dump_threshold_ns)
+			rate_limited = false;
+	}
+
+	if (!rate_limited)
+		return kbasep_hwcnt_virtualizer_client_dump(
+			hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf);
+
+	/* If we've gotten this far, the client must have something accumulated
+	 * otherwise it is a logic error
+	 */
+	WARN_ON(!hvcli->has_accum);
+
+	if (dump_buf)
+		kbase_hwcnt_dump_buffer_copy(
+			dump_buf, &hvcli->accum_buf, &hvcli->enable_map);
+	hvcli->has_accum = false;
+
+	*ts_start_ns = hvcli->ts_start_ns;
+	*ts_end_ns = hvirt->ts_last_dump_ns;
+	hvcli->ts_start_ns = hvirt->ts_last_dump_ns;
+
+	return 0;
+}
+
+int kbase_hwcnt_virtualizer_client_dump(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer *hvirt;
+
+	if (!hvcli || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hvirt = hvcli->hvirt;
+
+	if (dump_buf && (dump_buf->metadata != hvirt->metadata))
+		return -EINVAL;
+
+	mutex_lock(&hvirt->lock);
+
+	if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+		/*
+		 * If there's only one client with no prior accumulation, we can
+		 * completely skip the virtualize and just pass through the call
+		 * to the accumulator, saving a fair few copies and
+		 * accumulations.
+		 */
+		errcode = kbase_hwcnt_accumulator_dump(
+			hvirt->accum, ts_start_ns, ts_end_ns, dump_buf);
+
+		if (!errcode) {
+			/* Fix up the timestamps */
+			*ts_start_ns = hvcli->ts_start_ns;
+			hvcli->ts_start_ns = *ts_end_ns;
+
+			/* Store the most recent dump time for rate limiting */
+			hvirt->ts_last_dump_ns = *ts_end_ns;
+		}
+	} else {
+		/* Otherwise, do the full virtualize */
+		errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited(
+			hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf);
+	}
+
+	mutex_unlock(&hvirt->lock);
+
+	return errcode;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_dump);
+
+int kbase_hwcnt_virtualizer_client_create(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *hvcli;
+
+	if (!hvirt || !enable_map || !out_hvcli ||
+	    (enable_map->metadata != hvirt->metadata))
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_virtualizer_client_alloc(
+		hvirt->metadata, &hvcli);
+	if (errcode)
+		return errcode;
+
+	mutex_lock(&hvirt->lock);
+
+	errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map);
+
+	mutex_unlock(&hvirt->lock);
+
+	if (errcode) {
+		kbasep_hwcnt_virtualizer_client_free(hvcli);
+		return errcode;
+	}
+
+	*out_hvcli = hvcli;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_create);
+
+void kbase_hwcnt_virtualizer_client_destroy(
+	struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+	if (!hvcli)
+		return;
+
+	mutex_lock(&hvcli->hvirt->lock);
+
+	kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli);
+
+	mutex_unlock(&hvcli->hvirt->lock);
+
+	kbasep_hwcnt_virtualizer_client_free(hvcli);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_client_destroy);
+
+int kbase_hwcnt_virtualizer_init(
+	struct kbase_hwcnt_context *hctx,
+	u64 dump_threshold_ns,
+	struct kbase_hwcnt_virtualizer **out_hvirt)
+{
+	struct kbase_hwcnt_virtualizer *virt;
+	const struct kbase_hwcnt_metadata *metadata;
+
+	if (!hctx || !out_hvirt)
+		return -EINVAL;
+
+	metadata = kbase_hwcnt_context_metadata(hctx);
+	if (!metadata)
+		return -EINVAL;
+
+	virt = kzalloc(sizeof(*virt), GFP_KERNEL);
+	if (!virt)
+		return -ENOMEM;
+
+	virt->hctx = hctx;
+	virt->dump_threshold_ns = dump_threshold_ns;
+	virt->metadata = metadata;
+
+	mutex_init(&virt->lock);
+	INIT_LIST_HEAD(&virt->clients);
+
+	*out_hvirt = virt;
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_init);
+
+void kbase_hwcnt_virtualizer_term(
+	struct kbase_hwcnt_virtualizer *hvirt)
+{
+	if (!hvirt)
+		return;
+
+	/* Non-zero client count implies client leak */
+	if (WARN_ON(hvirt->client_count != 0)) {
+		struct kbase_hwcnt_virtualizer_client *pos, *n;
+
+		list_for_each_entry_safe(pos, n, &hvirt->clients, node)
+			kbase_hwcnt_virtualizer_client_destroy(pos);
+	}
+
+	WARN_ON(hvirt->client_count != 0);
+	WARN_ON(hvirt->accum);
+
+	kfree(hvirt);
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_virtualizer_term);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h
new file mode 100644
index 0000000..8f628c3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwcnt_virtualizer.h
@@ -0,0 +1,145 @@
+/*
+ *
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/**
+ * Hardware counter virtualizer API.
+ *
+ * Virtualizes a hardware counter context, so multiple clients can access
+ * a single hardware counter resource as though each was the exclusive user.
+ */
+
+#ifndef _KBASE_HWCNT_VIRTUALIZER_H_
+#define _KBASE_HWCNT_VIRTUALIZER_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_virtualizer;
+struct kbase_hwcnt_virtualizer_client;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer.
+ * @hctx:              Non-NULL pointer to the hardware counter context to
+ *                     virtualize.
+ * @dump_threshold_ns: Minimum threshold period for dumps between different
+ *                     clients where a new accumulator dump will not be
+ *                     performed, and instead accumulated values will be used.
+ *                     If 0, rate limiting will be disabled.
+ * @out_hvirt:         Non-NULL pointer to where the pointer to the created
+ *                     virtualizer will be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_init(
+	struct kbase_hwcnt_context *hctx,
+	u64 dump_threshold_ns,
+	struct kbase_hwcnt_virtualizer **out_hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer.
+ * @hvirt: Pointer to virtualizer to be terminated.
+ */
+void kbase_hwcnt_virtualizer_term(
+	struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by
+ *                                    the virtualizer, so related counter data
+ *                                    structures can be created.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *kbase_hwcnt_virtualizer_metadata(
+	struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client.
+ * @hvirt:      Non-NULL pointer to the hardware counter virtualizer.
+ * @enable_map: Non-NULL pointer to the enable map for the client. Must have the
+ *              same metadata as the virtualizer.
+ * @out_hvcli:  Non-NULL pointer to where the pointer to the created client will
+ *              be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_client_create(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	struct kbase_hwcnt_virtualizer_client **out_hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client.
+ * @hvcli: Pointer to the hardware counter client.
+ */
+void kbase_hwcnt_virtualizer_client_destroy(
+	struct kbase_hwcnt_virtualizer_client *hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ *                                               currently enabled counters, and
+ *                                               enable a new set of counters
+ *                                               that will be used for
+ *                                               subsequent dumps.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @enable_map:  Non-NULL pointer to the new counter enable map for the client.
+ *               Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_set_counters(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ *                                       currently enabled counters.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_dump(
+	struct kbase_hwcnt_virtualizer_client *hvcli,
+	u64 *ts_start_ns,
+	u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf);
+
+#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
index bee2f3a..9b138e5 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_ioctl.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,7 @@
  * 11.1:
  * - Add BASE_MEM_TILER_ALIGN_TOP under base_mem_alloc_flags
  * 11.2:
- * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_SECURE,
+ * - KBASE_MEM_QUERY_FLAGS can return KBASE_REG_PF_GROW and KBASE_REG_PROTECTED,
  *   which some user-side clients prior to 11.2 might fault if they received
  *   them
  * 11.3:
@@ -62,9 +62,31 @@
  *   with one softjob.
  * 11.11:
  * - Added BASE_MEM_GPU_VA_SAME_4GB_PAGE under base_mem_alloc_flags
+ * 11.12:
+ * - Removed ioctl: KBASE_IOCTL_GET_PROFILING_CONTROLS
+ * 11.13:
+ * - New ioctl: KBASE_IOCTL_MEM_EXEC_INIT
+ * 11.14:
+ * - Add BASE_MEM_GROUP_ID_MASK, base_mem_group_id_get, base_mem_group_id_set
+ *   under base_mem_alloc_flags
+ * 11.15:
+ * - Added BASEP_CONTEXT_MMU_GROUP_ID_MASK under base_context_create_flags.
+ * - Require KBASE_IOCTL_SET_FLAGS before BASE_MEM_MAP_TRACKING_HANDLE can be
+ *   passed to mmap().
+ * 11.16:
+ * - Extended ioctl KBASE_IOCTL_MEM_SYNC to accept imported dma-buf.
+ * - Modified (backwards compatible) ioctl KBASE_IOCTL_MEM_IMPORT behavior for
+ *   dma-buf. Now, buffers are mapped on GPU when first imported, no longer
+ *   requiring external resource or sticky resource tracking. UNLESS,
+ *   CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is enabled.
+ * 11.17:
+ * - Added BASE_JD_REQ_JOB_SLOT.
+ * - Reused padding field in base_jd_atom_v2 to pass job slot number.
+ * 11.18:
+ * - New ioctl: KBASE_IOCTL_GET_CPU_GPU_TIMEINFO
  */
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 11
+#define BASE_UK_VERSION_MINOR 17
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility with kernel
@@ -332,6 +354,7 @@
  * @va_pages: Number of VA pages to reserve for JIT
  * @max_allocations: Maximum number of concurrent allocations
  * @trim_level: Level of JIT allocation trimming to perform on free (0 - 100%)
+ * @group_id: Group ID to be used for physical allocations
  * @padding: Currently unused, must be zero
  *
  * Note that depending on the VA size of the application and GPU, the value
@@ -341,7 +364,8 @@
 	__u64 va_pages;
 	__u8 max_allocations;
 	__u8 trim_level;
-	__u8 padding[6];
+	__u8 group_id;
+	__u8 padding[5];
 };
 
 #define KBASE_IOCTL_MEM_JIT_INIT \
@@ -543,21 +567,6 @@
 	_IOW(KBASE_IOCTL_TYPE, 25, struct kbase_ioctl_fence_validate)
 
 /**
- * struct kbase_ioctl_get_profiling_controls - Get the profiling controls
- * @count: The size of @buffer in u32 words
- * @buffer: The buffer to receive the profiling controls
- * @padding: Padding
- */
-struct kbase_ioctl_get_profiling_controls {
-	__u64 buffer;
-	__u32 count;
-	__u32 padding;
-};
-
-#define KBASE_IOCTL_GET_PROFILING_CONTROLS \
-	_IOW(KBASE_IOCTL_TYPE, 26, struct kbase_ioctl_get_profiling_controls)
-
-/**
  * struct kbase_ioctl_mem_profile_add - Provide profiling information to kernel
  * @buffer: Pointer to the information
  * @len: Length
@@ -686,6 +695,52 @@
 	_IOWR(KBASE_IOCTL_TYPE, 35, union kbase_ioctl_cinstr_gwt_dump)
 
 
+/**
+ * struct kbase_ioctl_mem_exec_init - Initialise the EXEC_VA memory zone
+ *
+ * @va_pages: Number of VA pages to reserve for EXEC_VA
+ */
+struct kbase_ioctl_mem_exec_init {
+	__u64 va_pages;
+};
+
+#define KBASE_IOCTL_MEM_EXEC_INIT \
+	_IOW(KBASE_IOCTL_TYPE, 38, struct kbase_ioctl_mem_exec_init)
+
+
+/**
+ * union kbase_ioctl_get_cpu_gpu_timeinfo - Request zero or more types of
+ *                                          cpu/gpu time (counter values)
+ *
+ * @request_flags: Bit-flags indicating the requested types.
+ * @paddings:      Unused, size alignment matching the out.
+ * @sec:           Integer field of the monotonic time, unit in seconds.
+ * @nsec:          Fractional sec of the monotonic time, in nano-seconds.
+ * @padding:       Unused, for u64 alignment
+ * @timestamp:     System wide timestamp (counter) value.
+ * @cycle_counter: GPU cycle counter value.
+ *
+ * @in: Input parameters
+ * @out: Output parameters
+ *
+ */
+union kbase_ioctl_get_cpu_gpu_timeinfo {
+	struct {
+		__u32 request_flags;
+		__u32 paddings[7];
+	} in;
+	struct {
+		__u64 sec;
+		__u32 nsec;
+		__u32 padding;
+		__u64 timestamp;
+		__u64 cycle_counter;
+	} out;
+};
+
+#define KBASE_IOCTL_GET_CPU_GPU_TIMEINFO \
+	_IOWR(KBASE_IOCTL_TYPE, 50, union kbase_ioctl_get_cpu_gpu_timeinfo)
+
 /***************
  * test ioctls *
  ***************/
@@ -760,6 +815,21 @@
 
 #endif
 
+/* Customer extension range */
+#define KBASE_IOCTL_EXTRA_TYPE (KBASE_IOCTL_TYPE + 2)
+
+/* If the integration needs extra ioctl add them there
+ * like this:
+ *
+ * struct my_ioctl_args {
+ *  ....
+ * }
+ *
+ * #define KBASE_IOCTL_MY_IOCTL \
+ *         _IOWR(KBASE_IOCTL_EXTRA_TYPE, 0, struct my_ioctl_args)
+ */
+
+
 /**********************************
  * Definitions for GPU properties *
  **********************************/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c
index 4f6a346..02d5976 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jd.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,9 +22,7 @@
 
 
 
-#if defined(CONFIG_DMA_SHARED_BUFFER)
 #include <linux/dma-buf.h>
-#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
 #ifdef CONFIG_COMPAT
 #include <linux/compat.h>
 #endif
@@ -35,7 +33,7 @@
 
 #include <mali_kbase_jm.h>
 #include <mali_kbase_hwaccess_jm.h>
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
 
 #include "mali_kbase_dma_fence.h"
 
@@ -95,11 +93,7 @@
 			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
 			return 0;
 		}
-		if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
-						  == BASE_JD_REQ_SOFT_REPLAY) {
-			if (!kbase_replay_process(katom))
-				katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-		} else if (kbase_process_soft_job(katom) == 0) {
+		if (kbase_process_soft_job(katom) == 0) {
 			kbase_finish_soft_job(katom);
 			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
 		}
@@ -199,7 +193,9 @@
 	u32 res_no;
 #ifdef CONFIG_MALI_DMA_FENCE
 	struct kbase_dma_fence_resv_info info = {
+		.resv_objs = NULL,
 		.dma_fence_resv_count = 0,
+		.dma_fence_excl_bitmap = NULL
 	};
 #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
 	/*
@@ -223,10 +219,8 @@
 		return -EINVAL;
 
 	katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
-	if (NULL == katom->extres) {
-		err_ret_val = -ENOMEM;
-		goto early_err_out;
-	}
+	if (!katom->extres)
+		return -ENOMEM;
 
 	/* copy user buffer to the end of our real buffer.
 	 * Make sure the struct sizes haven't changed in a way
@@ -270,14 +264,14 @@
 	/* need to keep the GPU VM locked while we set up UMM buffers */
 	kbase_gpu_vm_lock(katom->kctx);
 	for (res_no = 0; res_no < katom->nr_extres; res_no++) {
-		struct base_external_resource *res;
+		struct base_external_resource *res = &input_extres[res_no];
 		struct kbase_va_region *reg;
 		struct kbase_mem_phy_alloc *alloc;
+#ifdef CONFIG_MALI_DMA_FENCE
 		bool exclusive;
-
-		res = &input_extres[res_no];
 		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
 				? true : false;
+#endif
 		reg = kbase_region_tracker_find_region_enclosing_address(
 				katom->kctx,
 				res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
@@ -288,7 +282,7 @@
 		}
 
 		if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
-				(reg->flags & KBASE_REG_SECURE)) {
+				(reg->flags & KBASE_REG_PROTECTED)) {
 			katom->atom_flags |= KBASE_KATOM_FLAG_PROTECTED;
 		}
 
@@ -408,14 +402,7 @@
 			KBASE_DEBUG_ASSERT(dep_atom->status !=
 						KBASE_JD_ATOM_STATE_UNUSED);
 
-			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
-					!= BASE_JD_REQ_SOFT_REPLAY) {
-				dep_atom->will_fail_event_code =
-					dep_atom->event_code;
-			} else {
-				dep_atom->status =
-					KBASE_JD_ATOM_STATE_COMPLETED;
-			}
+			dep_atom->will_fail_event_code = dep_atom->event_code;
 		}
 		other_dep_atom = (struct kbase_jd_atom *)
 			kbase_jd_katom_dep_atom(&dep_atom->dep[other_d]);
@@ -459,54 +446,6 @@
 
 KBASE_EXPORT_TEST_API(jd_resolve_dep);
 
-#if MALI_CUSTOMER_RELEASE == 0
-static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
-{
-	kbdev->force_replay_count++;
-
-	if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
-		kbdev->force_replay_count = 0;
-		katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
-
-		if (kbdev->force_replay_random)
-			kbdev->force_replay_limit =
-			   (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
-
-		dev_info(kbdev->dev, "force_replay : promoting to error\n");
-	}
-}
-
-/** Test to see if atom should be forced to fail.
- *
- * This function will check if an atom has a replay job as a dependent. If so
- * then it will be considered for forced failure. */
-static void jd_check_force_failure(struct kbase_jd_atom *katom)
-{
-	struct kbase_context *kctx = katom->kctx;
-	struct kbase_device *kbdev = kctx->kbdev;
-	int i;
-
-	if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
-	    (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
-		return;
-
-	for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
-		if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
-		    kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
-			struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
-
-			if ((dep_atom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) ==
-						     BASE_JD_REQ_SOFT_REPLAY &&
-			    (dep_atom->core_req & kbdev->force_replay_core_req)
-					     == kbdev->force_replay_core_req) {
-				jd_force_failure(kbdev, katom);
-				return;
-			}
-		}
-	}
-}
-#endif
-
 /**
  * is_dep_valid - Validate that a dependency is valid for early dependency
  *                submission
@@ -618,10 +557,6 @@
 
 	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
 
-#if MALI_CUSTOMER_RELEASE == 0
-	jd_check_force_failure(katom);
-#endif
-
 	/* This is needed in case an atom is failed due to being invalid, this
 	 * can happen *before* the jobs that the atom depends on have completed */
 	for (i = 0; i < 2; i++) {
@@ -678,21 +613,9 @@
 			} else {
 				node->event_code = katom->event_code;
 
-				if ((node->core_req &
-					BASE_JD_REQ_SOFT_JOB_TYPE) ==
-					BASE_JD_REQ_SOFT_REPLAY) {
-					if (kbase_replay_process(node))
-						/* Don't complete this atom */
-						continue;
-				} else if (node->core_req &
+				if (node->core_req &
 							BASE_JD_REQ_SOFT_JOB) {
-					/* If this is a fence wait soft job
-					 * then remove it from the list of sync
-					 * waiters.
-					 */
-					if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
-						kbasep_remove_waiting_soft_job(node);
-
+					WARN_ON(!list_empty(&node->queue));
 					kbase_finish_soft_job(node);
 				}
 				node->status = KBASE_JD_ATOM_STATE_COMPLETED;
@@ -710,7 +633,7 @@
 		}
 
 		/* Register a completed job as a disjoint event when the GPU
-		 * is in a disjoint state (ie. being reset or replaying jobs).
+		 * is in a disjoint state (ie. being reset).
 		 */
 		kbase_disjoint_event_potential(kctx->kbdev);
 		if (completed_jobs_ctx)
@@ -788,6 +711,7 @@
 
 bool jd_submit_atom(struct kbase_context *kctx, const struct base_jd_atom_v2 *user_atom, struct kbase_jd_atom *katom)
 {
+	struct kbase_device *kbdev = kctx->kbdev;
 	struct kbase_jd_context *jctx = &kctx->jctx;
 	int queued = 0;
 	int i;
@@ -810,8 +734,8 @@
 	katom->extres = NULL;
 	katom->device_nr = user_atom->device_nr;
 	katom->jc = user_atom->jc;
-	katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
 	katom->core_req = user_atom->core_req;
+	katom->jobslot = user_atom->jobslot;
 	katom->atom_flags = 0;
 	katom->retry_count = 0;
 	katom->need_cache_flush_cores_retained = 0;
@@ -827,6 +751,7 @@
 
 	katom->age = kctx->age_count++;
 
+	INIT_LIST_HEAD(&katom->queue);
 	INIT_LIST_HEAD(&katom->jd_item);
 #ifdef CONFIG_MALI_DMA_FENCE
 	kbase_fence_dep_count_set(katom, -1);
@@ -851,11 +776,15 @@
 				 * back to user space. Do not record any
 				 * dependencies. */
 				KBASE_TLSTREAM_TL_NEW_ATOM(
+						kbdev,
 						katom,
 						kbase_jd_atom_id(kctx, katom));
 				KBASE_TLSTREAM_TL_RET_ATOM_CTX(
+						kbdev,
 						katom, kctx);
-				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(
+						kbdev,
+						katom,
 						TL_ATOM_STATE_IDLE);
 
 				ret = jd_done_nolock(katom, NULL);
@@ -894,23 +823,17 @@
 			katom->event_code = dep_atom->event_code;
 			katom->status = KBASE_JD_ATOM_STATE_QUEUED;
 
-			/* This atom is going through soft replay or
-			 * will be sent back to user space. Do not record any
-			 * dependencies. */
+			/* This atom will be sent back to user space.
+			 * Do not record any dependencies.
+			 */
 			KBASE_TLSTREAM_TL_NEW_ATOM(
+					kbdev,
 					katom,
 					kbase_jd_atom_id(kctx, katom));
-			KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
-			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom,
+			KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
+			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom,
 					TL_ATOM_STATE_IDLE);
 
-			if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
-					 == BASE_JD_REQ_SOFT_REPLAY) {
-				if (kbase_replay_process(katom)) {
-					ret = false;
-					goto out;
-				}
-			}
 			will_fail = true;
 
 		} else {
@@ -971,32 +894,14 @@
 		sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
 	katom->sched_priority = sched_prio;
 
-	/* Create a new atom recording all dependencies it was set up with. */
+	/* Create a new atom. */
 	KBASE_TLSTREAM_TL_NEW_ATOM(
+			kbdev,
 			katom,
 			kbase_jd_atom_id(kctx, katom));
-	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_IDLE);
-	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(katom, katom->sched_priority);
-	KBASE_TLSTREAM_TL_RET_ATOM_CTX(katom, kctx);
-	for (i = 0; i < 2; i++)
-		if (BASE_JD_DEP_TYPE_INVALID != kbase_jd_katom_dep_type(
-					&katom->dep[i])) {
-			KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(
-					(void *)kbase_jd_katom_dep_atom(
-						&katom->dep[i]),
-					(void *)katom);
-		} else if (BASE_JD_DEP_TYPE_INVALID !=
-				user_atom->pre_dep[i].dependency_type) {
-			/* Resolved dependency. */
-			int dep_atom_number =
-				user_atom->pre_dep[i].atom_id;
-			struct kbase_jd_atom *dep_atom =
-				&jctx->atoms[dep_atom_number];
-
-			KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(
-					(void *)dep_atom,
-					(void *)katom);
-		}
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority);
+	KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
 
 	/* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
 	if (!katom->jc && (katom->core_req & BASE_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
@@ -1090,15 +995,7 @@
 	}
 #endif /* CONFIG_MALI_DMA_FENCE */
 
-	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE)
-						  == BASE_JD_REQ_SOFT_REPLAY) {
-		if (kbase_replay_process(katom))
-			ret = false;
-		else
-			ret = jd_done_nolock(katom, NULL);
-
-		goto out;
-	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+	if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
 		if (kbase_process_soft_job(katom) == 0) {
 			kbase_finish_soft_job(katom);
 			ret = jd_done_nolock(katom, NULL);
@@ -1216,7 +1113,7 @@
 				       jd_submit_atom(kctx, &user_atom, katom);
 
 		/* Register a completed job as a disjoint event when the GPU is in a disjoint state
-		 * (ie. being reset or replaying jobs).
+		 * (ie. being reset).
 		 */
 		kbase_disjoint_event_potential(kbdev);
 
@@ -1243,7 +1140,6 @@
 	struct kbasep_js_atom_retained_state katom_retained_state;
 	bool context_idle;
 	base_jd_core_req core_req = katom->core_req;
-	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
 
 	/* Soft jobs should never reach this function */
 	KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
@@ -1262,7 +1158,7 @@
 	 * Begin transaction on JD context and JS context
 	 */
 	mutex_lock(&jctx->lock);
-	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(katom, TL_ATOM_STATE_DONE);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_DONE);
 	mutex_lock(&js_devdata->queue_mutex);
 	mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
 
@@ -1389,7 +1285,7 @@
 		mutex_unlock(&jctx->lock);
 	}
 
-	kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
+	kbase_backend_complete_wq_post_sched(kbdev, core_req);
 
 	if (context_idle)
 		kbase_pm_context_idle(kbdev);
@@ -1577,6 +1473,10 @@
 	flush_workqueue(kctx->dma_fence.wq);
 #endif
 
+#ifdef CONFIG_DEBUG_FS
+	kbase_debug_job_fault_kctx_unblock(kctx);
+#endif
+
 	kbase_jm_wait_for_zero_jobs(kctx);
 }
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
index 6b79d89..2fc21fd 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -78,7 +78,7 @@
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
 					"Sd(%u#%u: %s) ",
 #else
-					"Sd(%llu#%llu: %s) ",
+					"Sd(%llu#%u: %s) ",
 #endif
 					fence->context,
 					fence->seqno,
@@ -98,7 +98,7 @@
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
 					"Wd(%u#%u: %s) ",
 #else
-					"Wd(%llu#%llu: %s) ",
+					"Wd(%llu#%u: %s) ",
 #endif
 					fence->context,
 					fence->seqno,
@@ -190,9 +190,8 @@
 		kbasep_jd_debugfs_atom_deps(deps, atom);
 
 		seq_printf(sfile,
-				"%3u, %8x, %2u, %2u, %c%3u %c%3u, %20lld, ",
+				"%3u, %8x, %2u, %c%3u %c%3u, %20lld, ",
 				i, atom->core_req, atom->status,
-				atom->coreref_state,
 				deps[0].type, deps[0].id,
 				deps[1].type, deps[1].id,
 				start_timestamp);
@@ -222,6 +221,7 @@
 }
 
 static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+	.owner = THIS_MODULE,
 	.open = kbasep_jd_debugfs_atoms_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -230,7 +230,12 @@
 
 void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
 {
-	KBASE_DEBUG_ASSERT(kctx != NULL);
+	/* Caller already ensures this, but we keep the pattern for
+	 * maintenance safety.
+	 */
+	if (WARN_ON(!kctx) ||
+		WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
 
 	/* Expose all atoms */
 	debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
index ce0cb61..697bdef 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,7 +30,7 @@
 
 #include <linux/debugfs.h>
 
-#define MALI_JD_DEBUGFS_VERSION 2
+#define MALI_JD_DEBUGFS_VERSION 3
 
 /* Forward declarations */
 struct kbase_context;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c
index 66a8444..77d9716 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,10 +27,7 @@
  */
 #include <mali_kbase.h>
 #include <mali_kbase_js.h>
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-#include <mali_kbase_gator.h>
-#endif
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
 #include <mali_kbase_hw.h>
 #include <mali_kbase_ctx_sched.h>
 
@@ -365,6 +362,7 @@
 static void
 jsctx_tree_add(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
+	struct kbase_device *kbdev = kctx->kbdev;
 	int prio = katom->sched_priority;
 	int js = katom->slot_nr;
 	struct jsctx_queue *queue = &kctx->jsctx_queue[prio][js];
@@ -386,6 +384,8 @@
 	/* Add new node and rebalance tree. */
 	rb_link_node(&katom->runnable_tree_node, parent, new);
 	rb_insert_color(&katom->runnable_tree_node, &queue->runnable_tree);
+
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_READY);
 }
 
 /**
@@ -1183,8 +1183,6 @@
 		goto out_unlock;
 	}
 
-	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, TL_ATOM_STATE_READY);
-
 	enqueue_required = kbase_js_dep_resolved_submit(kctx, atom);
 
 	KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
@@ -1482,10 +1480,7 @@
 				kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
 				kbasep_js_is_submit_allowed(js_devdata, kctx));
 
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-		kbase_trace_mali_mmu_as_released(kctx->as_nr);
-#endif
-		KBASE_TLSTREAM_TL_NRET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+		KBASE_TLSTREAM_TL_NRET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx);
 
 		kbase_backend_release_ctx_irq(kbdev, kctx);
 
@@ -1763,10 +1758,7 @@
 
 	kbdev->hwaccess.active_kctx[js] = kctx;
 
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-	kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
-#endif
-	KBASE_TLSTREAM_TL_RET_AS_CTX(&kbdev->as[kctx->as_nr], kctx);
+	KBASE_TLSTREAM_TL_RET_AS_CTX(kbdev, &kbdev->as[kctx->as_nr], kctx);
 
 	/* Cause any future waiter-on-termination to wait until the context is
 	 * descheduled */
@@ -1886,7 +1878,7 @@
 	} else {
 		/* Already scheduled in - We need to retain it to keep the
 		 * corresponding address space */
-		kbasep_js_runpool_retain_ctx(kbdev, kctx);
+		WARN_ON(!kbasep_js_runpool_retain_ctx(kbdev, kctx));
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 		mutex_unlock(&js_devdata->queue_mutex);
 	}
@@ -2039,12 +2031,19 @@
 	    (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
 		return false;
 
+	if ((katom->core_req & BASE_JD_REQ_JOB_SLOT) &&
+			(katom->jobslot >= BASE_JM_MAX_NR_SLOTS))
+		return false;
+
 	return true;
 }
 
 static int kbase_js_get_slot(struct kbase_device *kbdev,
 				struct kbase_jd_atom *katom)
 {
+	if (katom->core_req & BASE_JD_REQ_JOB_SLOT)
+		return katom->jobslot;
+
 	if (katom->core_req & BASE_JD_REQ_FS)
 		return 0;
 
@@ -2259,9 +2258,8 @@
 	bool context_idle = false;
 	unsigned long flags;
 	base_jd_core_req core_req = katom->core_req;
-	enum kbase_atom_coreref_state coreref_state = katom->coreref_state;
 
-	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(katom);
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(kbdev, katom);
 
 	kbase_backend_complete_wq(kbdev, katom);
 
@@ -2344,12 +2342,14 @@
 	mutex_unlock(&js_devdata->queue_mutex);
 
 	katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+	WARN_ON(kbasep_js_has_atom_finished(&retained_state));
+
 	kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
 							&retained_state);
 
 	kbase_js_sched_all(kbdev);
 
-	kbase_backend_complete_wq_post_sched(kbdev, core_req, coreref_state);
+	kbase_backend_complete_wq_post_sched(kbdev, core_req);
 }
 
 void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
@@ -2492,10 +2492,8 @@
 				katom->sched_priority);
 	}
 
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-	kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
-				katom->slot_nr), NULL, 0);
-#endif
+	KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, NULL,
+		katom->slot_nr, 0, TL_JS_EVENT_STOP);
 
 	kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
 
@@ -2702,7 +2700,6 @@
 	struct kbase_device *kbdev = kctx->kbdev;
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 	struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
-	int js;
 
 	/*
 	 * Critical assumption: No more submission is possible outside of the
@@ -2758,6 +2755,7 @@
 	 */
 	if (!kbase_ctx_flag(kctx, KCTX_SCHEDULED)) {
 		unsigned long flags;
+		int js;
 
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
@@ -2823,8 +2821,7 @@
 		/* Cancel any remaining running jobs for this kctx - if any.
 		 * Submit is disallowed which takes effect immediately, so no
 		 * more new jobs will appear after we do this. */
-		for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
-			kbase_job_slot_hardstop(kctx, js, NULL);
+		kbase_backend_jm_kill_running_jobs_from_kctx(kctx);
 
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
index 6fd908a..1ff230c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -198,29 +198,6 @@
  * More commonly used public functions
  */
 
-void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
-{
-	bool runpool_state_changed = false;
-
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
-	KBASE_DEBUG_ASSERT(kctx != NULL);
-
-	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
-		/* This context never submits, so don't track any scheduling attributes */
-		return;
-	}
-
-	/* Transfer attributes held in the context flags for contexts that have submit enabled */
-
-	/* ... More attributes can be added here ... */
-
-	/* The context should not have been scheduled yet, so ASSERT if this caused
-	 * runpool state changes (note that other threads *can't* affect the value
-	 * of runpool_state_changed, due to how it's calculated) */
-	KBASE_DEBUG_ASSERT(runpool_state_changed == false);
-	CSTD_UNUSED(runpool_state_changed);
-}
-
 void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
 	bool runpool_state_changed;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
index be781e6..25fd397 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -46,14 +46,6 @@
  */
 
 /**
- * Set the initial attributes of a context (when context create flags are set)
- *
- * Requires:
- * - Hold the jsctx_mutex
- */
-void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
-
-/**
  * Retain all attributes of a context
  *
  * This occurs on scheduling in the context on the runpool (but after
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
index 7385daa..052a0b3 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -151,18 +151,19 @@
  */
 enum {
 	/*
-	 * In this mode, the context containing higher priority atoms will be
-	 * scheduled first and also the new runnable higher priority atoms can
-	 * preempt lower priority atoms currently running on the GPU, even if
-	 * they belong to a different context.
+	 * In this mode, higher priority atoms will be scheduled first,
+	 * regardless of the context they belong to. Newly-runnable higher
+	 * priority atoms can preempt lower priority atoms currently running on
+	 * the GPU, even if they belong to a different context.
 	 */
 	KBASE_JS_SYSTEM_PRIORITY_MODE = 0,
 
 	/*
-	 * In this mode, the contexts are scheduled in round-robin fashion and
-	 * the new runnable higher priority atoms can preempt the lower priority
-	 * atoms currently running on the GPU, only if they belong to the same
-	 * context.
+	 * In this mode, the highest-priority atom will be chosen from each
+	 * context in turn using a round-robin algorithm, so priority only has
+	 * an effect within the context an atom belongs to. Newly-runnable
+	 * higher priority atoms can preempt the lower priority atoms currently
+	 * running on the GPU, but only if they belong to the same context.
 	 */
 	KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE,
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c
index 1c1f0d1..fa05f34 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,21 +26,25 @@
  * @file mali_kbase_mem.c
  * Base kernel memory APIs
  */
-#ifdef CONFIG_DMA_SHARED_BUFFER
 #include <linux/dma-buf.h>
-#endif				/* CONFIG_DMA_SHARED_BUFFER */
 #include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/compat.h>
 #include <linux/version.h>
 #include <linux/log2.h>
+#ifdef CONFIG_OF
+#include <linux/of_platform.h>
+#endif
 
 #include <mali_kbase_config.h>
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
 #include <mali_kbase_cache_policy.h>
 #include <mali_kbase_hw.h>
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
+#include <mali_kbase_native_mgm.h>
+#include <mali_kbase_mem_pool_group.h>
+
 
 /* Forward declarations */
 static void free_partial_locked(struct kbase_context *kctx,
@@ -79,21 +83,28 @@
 {
 	struct rb_root *rbtree = NULL;
 
+	/* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA
+	 * zone if this has been initialized.
+	 */
+	if (gpu_pfn >= kctx->exec_va_start)
+		rbtree = &kctx->reg_rbtree_exec;
+	else {
+		u64 same_va_end;
+
 #ifdef CONFIG_64BIT
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		if (kbase_ctx_flag(kctx, KCTX_COMPAT))
 #endif /* CONFIG_64BIT */
-		if (gpu_pfn >= KBASE_REG_ZONE_CUSTOM_VA_BASE)
-			rbtree = &kctx->reg_rbtree_custom;
-		else
-			rbtree = &kctx->reg_rbtree_same;
+			same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
 #ifdef CONFIG_64BIT
-	} else {
-		if (gpu_pfn >= kctx->same_va_end)
+		else
+			same_va_end = kctx->same_va_end;
+#endif /* CONFIG_64BIT */
+
+		if (gpu_pfn >= same_va_end)
 			rbtree = &kctx->reg_rbtree_custom;
 		else
 			rbtree = &kctx->reg_rbtree_same;
 	}
-#endif /* CONFIG_64BIT */
 
 	return rbtree;
 }
@@ -224,7 +235,6 @@
 			rbnode = rbnode->rb_right;
 		else
 			return reg;
-
 	}
 
 	return NULL;
@@ -487,8 +497,11 @@
 
 	/* The executable allocation from the SAME_VA zone would already have an
 	 * appropriately aligned GPU VA chosen for it.
+	 * Also the executable allocation from EXEC_VA zone doesn't need the
+	 * special alignment.
 	 */
-	if (!(reg->flags & KBASE_REG_GPU_NX) && !addr) {
+	if (!(reg->flags & KBASE_REG_GPU_NX) && !addr &&
+	    ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) {
 		if (cpu_va_bits > gpu_pc_bits) {
 			align = max(align, (size_t)((1ULL << gpu_pc_bits)
 						>> PAGE_SHIFT));
@@ -533,6 +546,7 @@
 		struct kbase_va_region *reg,
 		u64 addr, size_t nr_pages, size_t align)
 {
+	struct device *const dev = kbdev->dev;
 	struct rb_root *rbtree = NULL;
 	struct kbase_va_region *tmp;
 	u64 gpu_pfn = addr >> PAGE_SHIFT;
@@ -551,8 +565,6 @@
 	 * which *must* be free.
 	 */
 	if (gpu_pfn) {
-		struct device *dev = kbdev->dev;
-
 		KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
 
 		tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn,
@@ -595,7 +607,13 @@
 		if (tmp) {
 			err = kbase_insert_va_region_nolock(reg, tmp,
 							start_pfn, nr_pages);
+			if (unlikely(err)) {
+				dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
+					start_pfn, nr_pages);
+			}
 		} else {
+			dev_dbg(dev, "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n",
+				nr_pages, align_offset, align_mask);
 			err = -ENOMEM;
 		}
 	}
@@ -614,11 +632,15 @@
 	kctx->reg_rbtree_same = RB_ROOT;
 	kbase_region_tracker_insert(same_va_reg);
 
-	/* Although custom_va_reg doesn't always exist,
+	/* Although custom_va_reg and exec_va_reg don't always exist,
 	 * initialize unconditionally because of the mem_view debugfs
-	 * implementation which relies on this being empty.
+	 * implementation which relies on them being empty.
+	 *
+	 * The difference between the two is that the EXEC_VA region
+	 * is never initialized at this stage.
 	 */
 	kctx->reg_rbtree_custom = RB_ROOT;
+	kctx->reg_rbtree_exec = RB_ROOT;
 
 	if (custom_va_reg)
 		kbase_region_tracker_insert(custom_va_reg);
@@ -653,6 +675,7 @@
 {
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
 }
 
 void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
@@ -666,9 +689,6 @@
 			(size_t) kctx->kbdev->gpu_props.mmu.va_bits);
 }
 
-/**
- * Initialize the region tracker data structure.
- */
 int kbase_region_tracker_init(struct kbase_context *kctx)
 {
 	struct kbase_va_region *same_va_reg;
@@ -718,12 +738,17 @@
 			goto fail_free_same_va;
 		}
 #ifdef CONFIG_64BIT
+	} else {
+		custom_va_size = 0;
 	}
 #endif
 
 	kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
 
 	kctx->same_va_end = same_va_pages + 1;
+	kctx->gpu_va_end = kctx->same_va_end + custom_va_size;
+	kctx->exec_va_start = U64_MAX;
+	kctx->jit_va = false;
 
 
 	kbase_gpu_vm_unlock(kctx);
@@ -742,13 +767,12 @@
 {
 	struct kbase_va_region *same_va;
 	struct kbase_va_region *custom_va_reg;
-	u64 same_va_bits = kbase_get_same_va_bits(kctx);
-	u64 total_va_size;
-	int err;
 
-	total_va_size = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1;
+	lockdep_assert_held(&kctx->reg_lock);
 
-	kbase_gpu_vm_lock(kctx);
+	/* First verify that a JIT_VA zone has not been created already. */
+	if (kctx->jit_va)
+		return -EINVAL;
 
 	/*
 	 * Modify the same VA free region after creation. Be careful to ensure
@@ -757,23 +781,11 @@
 	 */
 	same_va = kbase_region_tracker_find_region_base_address(kctx,
 			PAGE_SIZE);
-	if (!same_va) {
-		err = -ENOMEM;
-		goto fail_unlock;
-	}
+	if (!same_va)
+		return -ENOMEM;
 
-	/* The region flag or region size has changed since creation so bail. */
-	if ((!(same_va->flags & KBASE_REG_FREE)) ||
-			(same_va->nr_pages != total_va_size)) {
-		err = -ENOMEM;
-		goto fail_unlock;
-	}
-
-	if (same_va->nr_pages < jit_va_pages ||
-			kctx->same_va_end < jit_va_pages) {
-		err = -ENOMEM;
-		goto fail_unlock;
-	}
+	if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages)
+		return -ENOMEM;
 
 	/* It's safe to adjust the same VA zone now */
 	same_va->nr_pages -= jit_va_pages;
@@ -788,77 +800,193 @@
 				jit_va_pages,
 				KBASE_REG_ZONE_CUSTOM_VA);
 
-	if (!custom_va_reg) {
-		/*
-		 * The context will be destroyed if we fail here so no point
-		 * reverting the change we made to same_va.
-		 */
-		err = -ENOMEM;
-		goto fail_unlock;
-	}
+	/*
+	 * The context will be destroyed if we fail here so no point
+	 * reverting the change we made to same_va.
+	 */
+	if (!custom_va_reg)
+		return -ENOMEM;
 
 	kbase_region_tracker_insert(custom_va_reg);
-
-	kbase_gpu_vm_unlock(kctx);
 	return 0;
-
-fail_unlock:
-	kbase_gpu_vm_unlock(kctx);
-	return err;
 }
 #endif
 
 int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
-		u8 max_allocations, u8 trim_level)
+		u8 max_allocations, u8 trim_level, int group_id)
 {
+	int err = 0;
+
 	if (trim_level > 100)
 		return -EINVAL;
 
-	kctx->jit_max_allocations = max_allocations;
-	kctx->trim_level = trim_level;
+	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
+		WARN_ON(group_id < 0))
+		return -EINVAL;
+
+	kbase_gpu_vm_lock(kctx);
 
 #ifdef CONFIG_64BIT
 	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
-		return kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
+		err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
 #endif
 	/*
 	 * Nothing to do for 32-bit clients, JIT uses the existing
 	 * custom VA zone.
 	 */
-	return 0;
+
+	if (!err) {
+		kctx->jit_max_allocations = max_allocations;
+		kctx->trim_level = trim_level;
+		kctx->jit_va = true;
+		kctx->jit_group_id = group_id;
+	}
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return err;
+}
+
+int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
+{
+	struct kbase_va_region *shrinking_va_reg;
+	struct kbase_va_region *exec_va_reg;
+	u64 exec_va_start, exec_va_base_addr;
+	int err;
+
+	/* The EXEC_VA zone shall be created by making space at the end of the
+	 * address space. Firstly, verify that the number of EXEC_VA pages
+	 * requested by the client is reasonable and then make sure that it is
+	 * not greater than the address space itself before calculating the base
+	 * address of the new zone.
+	 */
+	if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES)
+		return -EINVAL;
+
+	kbase_gpu_vm_lock(kctx);
+
+	/* First verify that a JIT_VA zone has not been created already. */
+	if (kctx->jit_va) {
+		err = -EPERM;
+		goto exit_unlock;
+	}
+
+	if (exec_va_pages > kctx->gpu_va_end) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
+	exec_va_start = kctx->gpu_va_end - exec_va_pages;
+	exec_va_base_addr = exec_va_start << PAGE_SHIFT;
+
+	shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+			exec_va_base_addr);
+	if (!shrinking_va_reg) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
+	/* Make sure that the EXEC_VA region is still uninitialized */
+	if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) ==
+			KBASE_REG_ZONE_EXEC_VA) {
+		err = -EPERM;
+		goto exit_unlock;
+	}
+
+	if (shrinking_va_reg->nr_pages <= exec_va_pages) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
+	exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
+			exec_va_start,
+			exec_va_pages,
+			KBASE_REG_ZONE_EXEC_VA);
+	if (!exec_va_reg) {
+		err = -ENOMEM;
+		goto exit_unlock;
+	}
+
+	shrinking_va_reg->nr_pages -= exec_va_pages;
+#ifdef CONFIG_64BIT
+	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+		kctx->same_va_end -= exec_va_pages;
+#endif
+	kctx->exec_va_start = exec_va_start;
+
+	kbase_region_tracker_insert(exec_va_reg);
+	err = 0;
+
+exit_unlock:
+	kbase_gpu_vm_unlock(kctx);
+	return err;
 }
 
 
 int kbase_mem_init(struct kbase_device *kbdev)
 {
+	int err = 0;
 	struct kbasep_mem_device *memdev;
-	int ret;
+#ifdef CONFIG_OF
+	struct device_node *mgm_node = NULL;
+#endif
 
 	KBASE_DEBUG_ASSERT(kbdev);
 
 	memdev = &kbdev->memdev;
-	kbdev->mem_pool_max_size_default = KBASE_MEM_POOL_MAX_SIZE_KCTX;
+
+	kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
+		KBASE_MEM_POOL_MAX_SIZE_KCTX);
 
 	/* Initialize memory usage */
 	atomic_set(&memdev->used_pages, 0);
 
-	ret = kbase_mem_pool_init(&kbdev->mem_pool,
-			KBASE_MEM_POOL_MAX_SIZE_KBDEV,
-			KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
-			kbdev,
-			NULL);
-	if (ret)
-		return ret;
+	kbdev->mgm_dev = &kbase_native_mgm_dev;
 
-	ret = kbase_mem_pool_init(&kbdev->lp_mem_pool,
-			(KBASE_MEM_POOL_MAX_SIZE_KBDEV >> 9),
-			KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
-			kbdev,
-			NULL);
-	if (ret)
-		kbase_mem_pool_term(&kbdev->mem_pool);
+#ifdef CONFIG_OF
+	/* Check to see whether or not a platform-specific memory group manager
+	 * is configured and available.
+	 */
+	mgm_node = of_parse_phandle(kbdev->dev->of_node,
+		"physical-memory-group-manager", 0);
+	if (!mgm_node) {
+		dev_info(kbdev->dev,
+			"No memory group manager is configured\n");
+	} else {
+		struct platform_device *const pdev =
+			of_find_device_by_node(mgm_node);
 
-	return ret;
+		if (!pdev) {
+			dev_err(kbdev->dev,
+				"The configured memory group manager was not found\n");
+		} else {
+			kbdev->mgm_dev = platform_get_drvdata(pdev);
+			if (!kbdev->mgm_dev) {
+				dev_info(kbdev->dev,
+					"Memory group manager is not ready\n");
+				err = -EPROBE_DEFER;
+			} else if (!try_module_get(kbdev->mgm_dev->owner)) {
+				dev_err(kbdev->dev,
+					"Failed to get memory group manger module\n");
+				err = -ENODEV;
+				kbdev->mgm_dev = NULL;
+			}
+		}
+		of_node_put(mgm_node);
+	}
+#endif
+
+	if (likely(!err)) {
+		struct kbase_mem_pool_group_config mem_pool_defaults;
+
+		kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults,
+			KBASE_MEM_POOL_MAX_SIZE_KBDEV);
+
+		err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev,
+			&mem_pool_defaults, NULL);
+	}
+
+	return err;
 }
 
 void kbase_mem_halt(struct kbase_device *kbdev)
@@ -879,8 +1007,10 @@
 	if (pages != 0)
 		dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
 
-	kbase_mem_pool_term(&kbdev->mem_pool);
-	kbase_mem_pool_term(&kbdev->lp_mem_pool);
+	kbase_mem_pool_group_term(&kbdev->mem_pools);
+
+	if (kbdev->mgm_dev)
+		module_put(kbdev->mgm_dev->owner);
 }
 
 KBASE_EXPORT_TEST_API(kbase_mem_term);
@@ -948,6 +1078,10 @@
 		kctx = container_of(rbtree, struct kbase_context,
 				reg_rbtree_same);
 		break;
+	case KBASE_REG_ZONE_EXEC_VA:
+		kctx = container_of(rbtree, struct kbase_context,
+				reg_rbtree_exec);
+		break;
 	default:
 		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
 		break;
@@ -977,6 +1111,7 @@
 		if (WARN_ON(kbase_is_region_invalid(reg)))
 			return;
 
+
 		mutex_lock(&kctx->jit_evict_lock);
 
 		/*
@@ -1041,8 +1176,10 @@
 	unsigned long attr;
 	unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
 	unsigned long gwt_mask = ~0;
+	int group_id;
+	struct kbase_mem_phy_alloc *alloc;
 
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 	if (kctx->gwt_enabled)
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif
@@ -1060,12 +1197,12 @@
 	if (err)
 		return err;
 
-	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
-		u64 stride;
-		struct kbase_mem_phy_alloc *alloc;
+	alloc = reg->gpu_alloc;
+	group_id = alloc->group_id;
 
-		alloc = reg->gpu_alloc;
-		stride = alloc->imported.alias.stride;
+	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		u64 const stride = alloc->imported.alias.stride;
+
 		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
 		for (i = 0; i < alloc->imported.alias.nents; i++) {
 			if (alloc->imported.alias.aliased[i].alloc) {
@@ -1075,7 +1212,8 @@
 						alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
 						alloc->imported.alias.aliased[i].length,
 						reg->flags & gwt_mask,
-						kctx->as_nr);
+						kctx->as_nr,
+						group_id);
 				if (err)
 					goto bad_insert;
 
@@ -1085,7 +1223,8 @@
 					reg->start_pfn + i * stride,
 					kctx->aliasing_sink_page,
 					alloc->imported.alias.aliased[i].length,
-					(reg->flags & mask & gwt_mask) | attr);
+					(reg->flags & mask & gwt_mask) | attr,
+					group_id);
 
 				if (err)
 					goto bad_insert;
@@ -1098,29 +1237,50 @@
 				kbase_get_gpu_phy_pages(reg),
 				kbase_reg_current_backed_size(reg),
 				reg->flags & gwt_mask,
-				kctx->as_nr);
+				kctx->as_nr,
+				group_id);
 		if (err)
 			goto bad_insert;
-		kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+		kbase_mem_phy_alloc_gpu_mapped(alloc);
+	}
+
+	if (reg->flags & KBASE_REG_IMPORT_PAD &&
+	    !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
+	    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
+	    reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+		/* For padded imported dma-buf memory, map the dummy aliasing
+		 * page from the end of the dma-buf pages, to the end of the
+		 * region using a read only mapping.
+		 *
+		 * Only map when it's imported dma-buf memory that is currently
+		 * mapped.
+		 *
+		 * Assume reg->gpu_alloc->nents is the number of actual pages
+		 * in the dma-buf memory.
+		 */
+		err = kbase_mmu_insert_single_page(kctx,
+				reg->start_pfn + reg->gpu_alloc->nents,
+				kctx->aliasing_sink_page,
+				reg->nr_pages - reg->gpu_alloc->nents,
+				(reg->flags | KBASE_REG_GPU_RD) &
+				~KBASE_REG_GPU_WR,
+				KBASE_MEM_GROUP_SINK);
+		if (err)
+			goto bad_insert;
 	}
 
 	return err;
 
 bad_insert:
-	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
-		u64 stride;
+	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+				 reg->start_pfn, reg->nr_pages,
+				 kctx->as_nr);
 
-		stride = reg->gpu_alloc->imported.alias.stride;
-		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+	if (alloc->type == KBASE_MEM_TYPE_ALIAS) {
+		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
 		while (i--)
-			if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
-				kbase_mmu_teardown_pages(kctx->kbdev,
-					&kctx->mmu,
-					reg->start_pfn + (i * stride),
-					reg->gpu_alloc->imported.alias.aliased[i].length,
-					kctx->as_nr);
-				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
-			}
+			if (alloc->imported.alias.aliased[i].alloc)
+				kbase_mem_phy_alloc_gpu_unmapped(alloc->imported.alias.aliased[i].alloc);
 	}
 
 	kbase_remove_va_region(reg);
@@ -1135,44 +1295,58 @@
 
 int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 {
-	int err;
+	int err = 0;
+	size_t i;
 
 	if (reg->start_pfn == 0)
 		return 0;
 
-	if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
-		size_t i;
+	if (!reg->gpu_alloc)
+		return -EINVAL;
 
+	/* Tear down down GPU page tables, depending on memory type. */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_ALIAS: /* Fall-through */
+	case KBASE_MEM_TYPE_IMPORTED_UMM:
 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
 				reg->start_pfn, reg->nr_pages, kctx->as_nr);
+		break;
+	default:
+		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
+			reg->start_pfn, kbase_reg_current_backed_size(reg),
+			kctx->as_nr);
+		break;
+	}
+
+	/* Update tracking, and other cleanup, depending on memory type. */
+	switch (reg->gpu_alloc->type) {
+	case KBASE_MEM_TYPE_ALIAS:
 		KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
 		for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
 			if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
 				kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
-	} else {
-		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
-			reg->start_pfn, kbase_reg_current_backed_size(reg),
-			kctx->as_nr);
-		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
-	}
+		break;
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+			struct kbase_alloc_import_user_buf *user_buf =
+				&reg->gpu_alloc->imported.user_buf;
 
-	if (reg->gpu_alloc && reg->gpu_alloc->type ==
-			KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
-		struct kbase_alloc_import_user_buf *user_buf =
-			&reg->gpu_alloc->imported.user_buf;
+			if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
+				user_buf->current_mapping_usage_count &=
+					~PINNED_ON_IMPORT;
 
-		if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
-			user_buf->current_mapping_usage_count &=
-				~PINNED_ON_IMPORT;
-
-			kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
-					(reg->flags & KBASE_REG_GPU_WR));
+				/* The allocation could still have active mappings. */
+				if (user_buf->current_mapping_usage_count == 0) {
+					kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
+						(reg->flags & KBASE_REG_GPU_WR));
+				}
+			}
 		}
+		/* Fall-through */
+	default:
+		kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+		break;
 	}
 
-	if (err)
-		return err;
-
 	return err;
 }
 
@@ -1350,8 +1524,18 @@
 		goto out_unlock;
 	}
 
-	if (!(reg->flags & KBASE_REG_CPU_CACHED) ||
-			kbase_mem_is_imported(reg->gpu_alloc->type))
+	/*
+	 * Handle imported memory before checking for KBASE_REG_CPU_CACHED. The
+	 * CPU mapping cacheability is defined by the owner of the imported
+	 * memory, and not by kbase, therefore we must assume that any imported
+	 * memory may be cached.
+	 */
+	if (kbase_mem_is_imported(reg->gpu_alloc->type)) {
+		err = kbase_mem_do_sync_imported(kctx, reg, sync_fn);
+		goto out_unlock;
+	}
+
+	if (!(reg->flags & KBASE_REG_CPU_CACHED))
 		goto out_unlock;
 
 	start = (uintptr_t)sset->user_addr;
@@ -1453,8 +1637,8 @@
 	KBASE_DEBUG_ASSERT(NULL != reg);
 	lockdep_assert_held(&kctx->reg_lock);
 
-	if (reg->flags & KBASE_REG_JIT) {
-		dev_warn(kctx->kbdev->dev, "Attempt to free JIT memory!\n");
+	if (reg->flags & KBASE_REG_NO_USER_FREE) {
+		dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
 		return -EINVAL;
 	}
 
@@ -1629,9 +1813,12 @@
 			KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
 	}
 
-	if (flags & BASE_MEM_PERMANENT_KERNEL_MAPPING)
+	if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING)
 		reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
 
+	if (flags & BASEP_MEM_NO_USER_FREE)
+		reg->flags |= KBASE_REG_NO_USER_FREE;
+
 	if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
 		reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
 
@@ -1645,10 +1832,14 @@
 	size_t nr_left = nr_pages_requested;
 	int res;
 	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
 	struct tagged_addr *tp;
 
-	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
-	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
+	if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) ||
+	    WARN_ON(alloc->imported.native.kctx == NULL) ||
+	    WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
+		return -EINVAL;
+	}
 
 	if (alloc->reg) {
 		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
@@ -1656,14 +1847,15 @@
 	}
 
 	kctx = alloc->imported.native.kctx;
+	kbdev = kctx->kbdev;
 
 	if (nr_pages_requested == 0)
 		goto done; /*nothing to do*/
 
-	new_page_count = kbase_atomic_add_pages(
-			nr_pages_requested, &kctx->used_pages);
-	kbase_atomic_add_pages(nr_pages_requested,
-			       &kctx->kbdev->memdev.used_pages);
+	new_page_count = atomic_add_return(
+		nr_pages_requested, &kctx->used_pages);
+	atomic_add(nr_pages_requested,
+		&kctx->kbdev->memdev.used_pages);
 
 	/* Increase mm counters before we allocate pages so that this
 	 * allocation is visible to the OOM killer */
@@ -1678,10 +1870,11 @@
 	if (nr_left >= (SZ_2M / SZ_4K)) {
 		int nr_lp = nr_left / (SZ_2M / SZ_4K);
 
-		res = kbase_mem_pool_alloc_pages(&kctx->lp_mem_pool,
-						 nr_lp * (SZ_2M / SZ_4K),
-						 tp,
-						 true);
+		res = kbase_mem_pool_alloc_pages(
+			&kctx->mem_pools.large[alloc->group_id],
+			 nr_lp * (SZ_2M / SZ_4K),
+			 tp,
+			 true);
 
 		if (res > 0) {
 			nr_left -= res;
@@ -1727,10 +1920,15 @@
 			do {
 				int err;
 
-				np = kbase_mem_pool_alloc(&kctx->lp_mem_pool);
+				np = kbase_mem_pool_alloc(
+					&kctx->mem_pools.large[
+						alloc->group_id]);
 				if (np)
 					break;
-				err = kbase_mem_pool_grow(&kctx->lp_mem_pool, 1);
+
+				err = kbase_mem_pool_grow(
+					&kctx->mem_pools.large[alloc->group_id],
+					1);
 				if (err)
 					break;
 			} while (1);
@@ -1742,7 +1940,11 @@
 
 				sa = kmalloc(sizeof(*sa), GFP_KERNEL);
 				if (!sa) {
-					kbase_mem_pool_free(&kctx->lp_mem_pool, np, false);
+					kbase_mem_pool_free(
+						&kctx->mem_pools.large[
+							alloc->group_id],
+						np,
+						false);
 					goto no_new_partial;
 				}
 
@@ -1771,15 +1973,15 @@
 #endif
 
 	if (nr_left) {
-		res = kbase_mem_pool_alloc_pages(&kctx->mem_pool,
-						 nr_left,
-						 tp,
-						 false);
+		res = kbase_mem_pool_alloc_pages(
+			&kctx->mem_pools.small[alloc->group_id],
+			nr_left, tp, false);
 		if (res <= 0)
 			goto alloc_failed;
 	}
 
 	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
 			kctx->id,
 			(u64)new_page_count);
 
@@ -1795,17 +1997,17 @@
 		alloc->nents += nr_pages_to_free;
 
 		kbase_process_page_usage_inc(kctx, nr_pages_to_free);
-		kbase_atomic_add_pages(nr_pages_to_free, &kctx->used_pages);
-		kbase_atomic_add_pages(nr_pages_to_free,
-			       &kctx->kbdev->memdev.used_pages);
+		atomic_add(nr_pages_to_free, &kctx->used_pages);
+		atomic_add(nr_pages_to_free,
+			&kctx->kbdev->memdev.used_pages);
 
 		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
 	}
 
 	kbase_process_page_usage_dec(kctx, nr_pages_requested);
-	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
-	kbase_atomic_sub_pages(nr_pages_requested,
-			       &kctx->kbdev->memdev.used_pages);
+	atomic_sub(nr_pages_requested, &kctx->used_pages);
+	atomic_sub(nr_pages_requested,
+		&kctx->kbdev->memdev.used_pages);
 
 invalid_request:
 	return -ENOMEM;
@@ -1820,6 +2022,7 @@
 	size_t nr_left = nr_pages_requested;
 	int res;
 	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
 	struct tagged_addr *tp;
 	struct tagged_addr *new_pages = NULL;
 
@@ -1838,16 +2041,17 @@
 	}
 
 	kctx = alloc->imported.native.kctx;
+	kbdev = kctx->kbdev;
 
 	lockdep_assert_held(&kctx->mem_partials_lock);
 
 	if (nr_pages_requested == 0)
 		goto done; /*nothing to do*/
 
-	new_page_count = kbase_atomic_add_pages(
-			nr_pages_requested, &kctx->used_pages);
-	kbase_atomic_add_pages(nr_pages_requested,
-			       &kctx->kbdev->memdev.used_pages);
+	new_page_count = atomic_add_return(
+		nr_pages_requested, &kctx->used_pages);
+	atomic_add(nr_pages_requested,
+		&kctx->kbdev->memdev.used_pages);
 
 	/* Increase mm counters before we allocate pages so that this
 	 * allocation is visible to the OOM killer
@@ -1953,6 +2157,7 @@
 #endif
 
 	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
 			kctx->id,
 			(u64)new_page_count);
 
@@ -1998,15 +2203,15 @@
 	}
 
 	kbase_process_page_usage_dec(kctx, nr_pages_requested);
-	kbase_atomic_sub_pages(nr_pages_requested, &kctx->used_pages);
-	kbase_atomic_sub_pages(nr_pages_requested,
-			       &kctx->kbdev->memdev.used_pages);
+	atomic_sub(nr_pages_requested, &kctx->used_pages);
+	atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages);
 
 invalid_request:
 	return NULL;
 }
 
-static void free_partial(struct kbase_context *kctx, struct tagged_addr tp)
+static void free_partial(struct kbase_context *kctx, int group_id, struct
+		tagged_addr tp)
 {
 	struct page *p, *head_page;
 	struct kbase_sub_alloc *sa;
@@ -2018,7 +2223,10 @@
 	clear_bit(p - head_page, sa->sub_pages);
 	if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
 		list_del(&sa->link);
-		kbase_mem_pool_free(&kctx->lp_mem_pool, head_page, true);
+		kbase_mem_pool_free(
+			&kctx->mem_pools.large[group_id],
+			head_page,
+			true);
 		kfree(sa);
 	} else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
 		   SZ_2M / SZ_4K - 1) {
@@ -2033,15 +2241,19 @@
 	size_t nr_pages_to_free)
 {
 	struct kbase_context *kctx = alloc->imported.native.kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
 	bool syncback;
 	bool reclaimed = (alloc->evicted != 0);
 	struct tagged_addr *start_free;
 	int new_page_count __maybe_unused;
 	size_t freed = 0;
 
-	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
-	KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
-	KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+	if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) ||
+	    WARN_ON(alloc->imported.native.kctx == NULL) ||
+	    WARN_ON(alloc->nents < nr_pages_to_free) ||
+	    WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
+		return -EINVAL;
+	}
 
 	/* early out if nothing to do */
 	if (0 == nr_pages_to_free)
@@ -2063,16 +2275,17 @@
 			/* This is a 2MB entry, so free all the 512 pages that
 			 * it points to
 			 */
-			kbase_mem_pool_free_pages(&kctx->lp_mem_pool,
-					512,
-					start_free,
-					syncback,
-					reclaimed);
+			kbase_mem_pool_free_pages(
+				&kctx->mem_pools.large[alloc->group_id],
+				512,
+				start_free,
+				syncback,
+				reclaimed);
 			nr_pages_to_free -= 512;
 			start_free += 512;
 			freed += 512;
 		} else if (is_partial(*start_free)) {
-			free_partial(kctx, *start_free);
+			free_partial(kctx, alloc->group_id, *start_free);
 			nr_pages_to_free--;
 			start_free++;
 			freed++;
@@ -2081,16 +2294,17 @@
 
 			local_end_free = start_free;
 			while (nr_pages_to_free &&
-			       !is_huge(*local_end_free) &&
-			       !is_partial(*local_end_free)) {
+				!is_huge(*local_end_free) &&
+				!is_partial(*local_end_free)) {
 				local_end_free++;
 				nr_pages_to_free--;
 			}
-			kbase_mem_pool_free_pages(&kctx->mem_pool,
-					local_end_free - start_free,
-					start_free,
-					syncback,
-					reclaimed);
+			kbase_mem_pool_free_pages(
+				&kctx->mem_pools.small[alloc->group_id],
+				local_end_free - start_free,
+				start_free,
+				syncback,
+				reclaimed);
 			freed += local_end_free - start_free;
 			start_free += local_end_free - start_free;
 		}
@@ -2104,14 +2318,15 @@
 	 */
 	if (!reclaimed) {
 		kbase_process_page_usage_dec(kctx, freed);
-		new_page_count = kbase_atomic_sub_pages(freed,
-							&kctx->used_pages);
-		kbase_atomic_sub_pages(freed,
-				       &kctx->kbdev->memdev.used_pages);
+		new_page_count = atomic_sub_return(freed,
+			&kctx->used_pages);
+		atomic_sub(freed,
+			&kctx->kbdev->memdev.used_pages);
 
 		KBASE_TLSTREAM_AUX_PAGESALLOC(
-				kctx->id,
-				(u64)new_page_count);
+			kbdev,
+			kctx->id,
+			(u64)new_page_count);
 	}
 
 	return 0;
@@ -2146,6 +2361,7 @@
 		size_t nr_pages_to_free)
 {
 	struct kbase_context *kctx = alloc->imported.native.kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
 	bool syncback;
 	bool reclaimed = (alloc->evicted != 0);
 	struct tagged_addr *start_free;
@@ -2224,17 +2440,19 @@
 		int new_page_count;
 
 		kbase_process_page_usage_dec(kctx, freed);
-		new_page_count = kbase_atomic_sub_pages(freed,
-							&kctx->used_pages);
-		kbase_atomic_sub_pages(freed,
-				       &kctx->kbdev->memdev.used_pages);
+		new_page_count = atomic_sub_return(freed,
+			&kctx->used_pages);
+		atomic_sub(freed,
+			&kctx->kbdev->memdev.used_pages);
 
 		KBASE_TLSTREAM_AUX_PAGESALLOC(
+				kbdev,
 				kctx->id,
 				(u64)new_page_count);
 	}
 }
 
+
 void kbase_mem_kref_free(struct kref *kref)
 {
 	struct kbase_mem_phy_alloc *alloc;
@@ -2284,17 +2502,27 @@
 	case KBASE_MEM_TYPE_RAW:
 		/* raw pages, external cleanup */
 		break;
-#ifdef CONFIG_DMA_SHARED_BUFFER
 	case KBASE_MEM_TYPE_IMPORTED_UMM:
+		if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
+			WARN_ONCE(alloc->imported.umm.current_mapping_usage_count != 1,
+					"WARNING: expected excatly 1 mapping, got %d",
+					alloc->imported.umm.current_mapping_usage_count);
+			dma_buf_unmap_attachment(
+					alloc->imported.umm.dma_attachment,
+					alloc->imported.umm.sgt,
+					DMA_BIDIRECTIONAL);
+		}
 		dma_buf_detach(alloc->imported.umm.dma_buf,
 			       alloc->imported.umm.dma_attachment);
 		dma_buf_put(alloc->imported.umm.dma_buf);
 		break;
-#endif
 	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
 		if (alloc->imported.user_buf.mm)
 			mmdrop(alloc->imported.user_buf.mm);
-		kfree(alloc->imported.user_buf.pages);
+		if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+			vfree(alloc->imported.user_buf.pages);
+		else
+			kfree(alloc->imported.user_buf.pages);
 		break;
 	default:
 		WARN(1, "Unexecpted free of type %d\n", alloc->type);
@@ -2427,8 +2655,8 @@
 	if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
 		return false;
 
-	/* Secure memory cannot be read by the CPU */
-	if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+	/* Protected memory cannot be read by the CPU */
+	if ((flags & BASE_MEM_PROTECTED) && (flags & BASE_MEM_PROT_CPU_RD))
 		return false;
 
 	return true;
@@ -2704,6 +2932,13 @@
 
 void kbase_jit_debugfs_init(struct kbase_context *kctx)
 {
+	/* Caller already ensures this, but we keep the pattern for
+	 * maintenance safety.
+	 */
+	if (WARN_ON(!kctx) ||
+		WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
+
 	/* Debugfs entry for getting the number of JIT allocations. */
 	debugfs_create_file("mem_jit_count", S_IRUGO, kctx->kctx_dentry,
 			kctx, &kbase_jit_debugfs_count_fops);
@@ -2751,7 +2986,7 @@
 		mutex_unlock(&kctx->jit_evict_lock);
 
 		kbase_gpu_vm_lock(kctx);
-		reg->flags &= ~KBASE_REG_JIT;
+		reg->flags &= ~KBASE_REG_NO_USER_FREE;
 		kbase_mem_free_region(kctx, reg);
 		kbase_gpu_vm_unlock(kctx);
 	} while (1);
@@ -2841,13 +3076,13 @@
 	}
 
 	if (pages_required >= (SZ_2M / SZ_4K)) {
-		pool = &kctx->lp_mem_pool;
+		pool = &kctx->mem_pools.large[kctx->jit_group_id];
 		/* Round up to number of 2 MB pages required */
 		pages_required += ((SZ_2M / SZ_4K) - 1);
 		pages_required /= (SZ_2M / SZ_4K);
 	} else {
 #endif
-		pool = &kctx->mem_pool;
+		pool = &kctx->mem_pools.small[kctx->jit_group_id];
 #ifdef CONFIG_MALI_2MB_ALLOC
 	}
 #endif
@@ -2927,6 +3162,31 @@
 	return ret;
 }
 
+static void trace_jit_stats(struct kbase_context *kctx,
+		u32 bin_id, u32 max_allocations)
+{
+	const u32 alloc_count =
+		kctx->jit_current_allocations_per_bin[bin_id];
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	struct kbase_va_region *walker;
+	u32 va_pages = 0;
+	u32 ph_pages = 0;
+
+	mutex_lock(&kctx->jit_evict_lock);
+	list_for_each_entry(walker, &kctx->jit_active_head, jit_node) {
+		if (walker->jit_bin_id != bin_id)
+			continue;
+
+		va_pages += walker->nr_pages;
+		ph_pages += walker->gpu_alloc->nents;
+	}
+	mutex_unlock(&kctx->jit_evict_lock);
+
+	KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id,
+		max_allocations, alloc_count, va_pages, ph_pages);
+}
+
 struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		struct base_jit_alloc_info *info)
 {
@@ -2934,12 +3194,21 @@
 
 	if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
 		/* Too many current allocations */
+		dev_dbg(kctx->kbdev->dev,
+			"Max JIT allocations limit reached: active allocations %d, max allocations %d\n",
+			kctx->jit_current_allocations,
+			kctx->jit_max_allocations);
 		return NULL;
 	}
 	if (info->max_allocations > 0 &&
 			kctx->jit_current_allocations_per_bin[info->bin_id] >=
 			info->max_allocations) {
 		/* Too many current allocations in this bin */
+		dev_dbg(kctx->kbdev->dev,
+			"Per bin limit of max JIT allocations reached: bin_id %d, active allocations %d, max allocations %d\n",
+			info->bin_id,
+			kctx->jit_current_allocations_per_bin[info->bin_id],
+			info->max_allocations);
 		return NULL;
 	}
 
@@ -2952,11 +3221,9 @@
 	if (info->usage_id != 0) {
 		/* First scan for an allocation with the same usage ID */
 		struct kbase_va_region *walker;
-		struct kbase_va_region *temp;
 		size_t current_diff = SIZE_MAX;
 
-		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
-				jit_node) {
+		list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) {
 
 			if (walker->jit_usage_id == info->usage_id &&
 					walker->jit_bin_id == info->bin_id &&
@@ -2994,11 +3261,9 @@
 		 * use. Search for an allocation we can reuse.
 		 */
 		struct kbase_va_region *walker;
-		struct kbase_va_region *temp;
 		size_t current_diff = SIZE_MAX;
 
-		list_for_each_entry_safe(walker, temp, &kctx->jit_pool_head,
-				jit_node) {
+		list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) {
 
 			if (walker->jit_bin_id == info->bin_id &&
 					meet_size_and_tiler_align_top_requirements(
@@ -3054,13 +3319,17 @@
 			 * better so return the allocation to the pool and
 			 * return the function with failure.
 			 */
+			dev_dbg(kctx->kbdev->dev,
+				"JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n",
+				info->va_pages, info->commit_pages);
 			goto update_failed_unlocked;
 		}
 	} else {
 		/* No suitable JIT allocation was found so create a new one */
 		u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
 				BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
-				BASE_MEM_COHERENT_LOCAL;
+				BASE_MEM_COHERENT_LOCAL |
+				BASEP_MEM_NO_USER_FREE;
 		u64 gpu_addr;
 
 		mutex_unlock(&kctx->jit_evict_lock);
@@ -3068,12 +3337,19 @@
 		if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
 			flags |= BASE_MEM_TILER_ALIGN_TOP;
 
+		flags |= base_mem_group_id_set(kctx->jit_group_id);
+
 		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
 				info->extent, &flags, &gpu_addr);
-		if (!reg)
+		if (!reg) {
+			/* Most likely not enough GPU virtual space left for
+			 * the new JIT allocation.
+			 */
+			dev_dbg(kctx->kbdev->dev,
+				"Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n",
+				info->va_pages, info->commit_pages);
 			goto out_unlocked;
-
-		reg->flags |= KBASE_REG_JIT;
+		}
 
 		mutex_lock(&kctx->jit_evict_lock);
 		list_add(&reg->jit_node, &kctx->jit_active_head);
@@ -3083,6 +3359,8 @@
 	kctx->jit_current_allocations++;
 	kctx->jit_current_allocations_per_bin[info->bin_id]++;
 
+	trace_jit_stats(kctx, info->bin_id, info->max_allocations);
+
 	reg->jit_usage_id = info->usage_id;
 	reg->jit_bin_id = info->bin_id;
 
@@ -3126,6 +3404,8 @@
 	kctx->jit_current_allocations--;
 	kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
 
+	trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX);
+
 	kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
 
 	kbase_gpu_vm_lock(kctx);
@@ -3191,7 +3471,7 @@
 	mutex_unlock(&kctx->jit_evict_lock);
 
 	if (reg) {
-		reg->flags &= ~KBASE_REG_JIT;
+		reg->flags &= ~KBASE_REG_NO_USER_FREE;
 		kbase_mem_free_region(kctx, reg);
 	}
 
@@ -3213,7 +3493,7 @@
 		list_del(&walker->jit_node);
 		list_del_init(&walker->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
-		walker->flags &= ~KBASE_REG_JIT;
+		walker->flags &= ~KBASE_REG_NO_USER_FREE;
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -3225,7 +3505,7 @@
 		list_del(&walker->jit_node);
 		list_del_init(&walker->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
-		walker->flags &= ~KBASE_REG_JIT;
+		walker->flags &= ~KBASE_REG_NO_USER_FREE;
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -3239,37 +3519,53 @@
 	cancel_work_sync(&kctx->jit_work);
 }
 
-static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+bool kbase_has_exec_va_zone(struct kbase_context *kctx)
+{
+	bool has_exec_va_zone;
+
+	kbase_gpu_vm_lock(kctx);
+	has_exec_va_zone = (kctx->exec_va_start != U64_MAX);
+	kbase_gpu_vm_unlock(kctx);
+
+	return has_exec_va_zone;
+}
+
+
+int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
 		struct kbase_va_region *reg)
 {
+	struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+	struct page **pages = alloc->imported.user_buf.pages;
+	unsigned long address = alloc->imported.user_buf.address;
+	struct mm_struct *mm = alloc->imported.user_buf.mm;
 	long pinned_pages;
-	struct kbase_mem_phy_alloc *alloc;
-	struct page **pages;
-	struct tagged_addr *pa;
 	long i;
-	int err = -ENOMEM;
-	unsigned long address;
-	struct mm_struct *mm;
-	struct device *dev;
-	unsigned long offset;
-	unsigned long local_size;
-	unsigned long gwt_mask = ~0;
 
-	alloc = reg->gpu_alloc;
-	pa = kbase_get_gpu_phy_pages(reg);
-	address = alloc->imported.user_buf.address;
-	mm = alloc->imported.user_buf.mm;
+	if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF))
+		return -EINVAL;
 
-	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	if (alloc->nents) {
+		if (WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages))
+			return -EINVAL;
+		else
+			return 0;
+	}
 
-	pages = alloc->imported.user_buf.pages;
+	if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm))
+		return -EINVAL;
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
 	pinned_pages = get_user_pages(NULL, mm,
 			address,
 			alloc->imported.user_buf.nr_pages,
+#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
+KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#else
 			reg->flags & KBASE_REG_GPU_WR,
 			0, pages, NULL);
+#endif
 #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
 	pinned_pages = get_user_pages_remote(NULL, mm,
 			address,
@@ -3299,6 +3595,34 @@
 		return -ENOMEM;
 	}
 
+	alloc->nents = pinned_pages;
+
+	return 0;
+}
+
+static int kbase_jd_user_buf_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	long pinned_pages;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	struct tagged_addr *pa;
+	long i;
+	unsigned long address;
+	struct device *dev;
+	unsigned long offset;
+	unsigned long local_size;
+	unsigned long gwt_mask = ~0;
+	int err = kbase_jd_user_buf_pin_pages(kctx, reg);
+
+	if (err)
+		return err;
+
+	alloc = reg->gpu_alloc;
+	pa = kbase_get_gpu_phy_pages(reg);
+	address = alloc->imported.user_buf.address;
+	pinned_pages = alloc->nents;
+	pages = alloc->imported.user_buf.pages;
 	dev = kctx->kbdev->dev;
 	offset = address & ~PAGE_MASK;
 	local_size = alloc->imported.user_buf.size;
@@ -3321,21 +3645,21 @@
 		offset = 0;
 	}
 
-	alloc->nents = pinned_pages;
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 	if (kctx->gwt_enabled)
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif
 
 	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 			pa, kbase_reg_current_backed_size(reg),
-			reg->flags & gwt_mask, kctx->as_nr);
+			reg->flags & gwt_mask, kctx->as_nr,
+			alloc->group_id);
 	if (err == 0)
 		return 0;
 
-	alloc->nents = 0;
 	/* fall down */
 unwind:
+	alloc->nents = 0;
 	while (i--) {
 		dma_unmap_page(kctx->kbdev->dev,
 				alloc->imported.user_buf.dma_addrs[i],
@@ -3350,6 +3674,10 @@
 	return err;
 }
 
+/* This function would also perform the work of unpinning pages on Job Manager
+ * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT
+ * have a corresponding call to kbase_jd_user_buf_unpin_pages().
+ */
 static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
 		struct kbase_mem_phy_alloc *alloc, bool writeable)
 {
@@ -3376,128 +3704,19 @@
 	alloc->nents = 0;
 }
 
-#ifdef CONFIG_DMA_SHARED_BUFFER
-static int kbase_jd_umm_map(struct kbase_context *kctx,
-		struct kbase_va_region *reg)
-{
-	struct sg_table *sgt;
-	struct scatterlist *s;
-	int i;
-	struct tagged_addr *pa;
-	int err;
-	size_t count = 0;
-	struct kbase_mem_phy_alloc *alloc;
-	unsigned long gwt_mask = ~0;
-
-	alloc = reg->gpu_alloc;
-
-	KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
-	KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
-	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
-			DMA_BIDIRECTIONAL);
-
-	if (IS_ERR_OR_NULL(sgt))
-		return -EINVAL;
-
-	/* save for later */
-	alloc->imported.umm.sgt = sgt;
-
-	pa = kbase_get_gpu_phy_pages(reg);
-	KBASE_DEBUG_ASSERT(pa);
-
-	for_each_sg(sgt->sgl, s, sgt->nents, i) {
-		size_t j, pages = PFN_UP(sg_dma_len(s));
-
-		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
-		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
-		sg_dma_len(s));
-
-		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
-		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
-		(unsigned long long) sg_dma_address(s));
-
-		for (j = 0; (j < pages) && (count < reg->nr_pages); j++,
-				count++)
-			*pa++ = as_tagged(sg_dma_address(s) +
-				(j << PAGE_SHIFT));
-		WARN_ONCE(j < pages,
-			  "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
-		alloc->imported.umm.dma_buf->size);
-	}
-
-	if (!(reg->flags & KBASE_REG_IMPORT_PAD) &&
-			WARN_ONCE(count < reg->nr_pages,
-			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
-			alloc->imported.umm.dma_buf->size)) {
-		err = -EINVAL;
-		goto err_unmap_attachment;
-	}
-
-	/* Update nents as we now have pages to map */
-	alloc->nents = reg->nr_pages;
-
-#ifdef CONFIG_MALI_JOB_DUMP
-	if (kctx->gwt_enabled)
-		gwt_mask = ~KBASE_REG_GPU_WR;
-#endif
-
-	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-			kbase_get_gpu_phy_pages(reg),
-			count,
-			(reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD) &
-			 gwt_mask,
-			kctx->as_nr);
-	if (err)
-		goto err_unmap_attachment;
-
-	if (reg->flags & KBASE_REG_IMPORT_PAD) {
-		err = kbase_mmu_insert_single_page(kctx,
-				reg->start_pfn + count,
-				kctx->aliasing_sink_page,
-				reg->nr_pages - count,
-				(reg->flags | KBASE_REG_GPU_RD) &
-				~KBASE_REG_GPU_WR);
-		if (err)
-			goto err_teardown_orig_pages;
-	}
-
-	return 0;
-
-err_teardown_orig_pages:
-	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-			count, kctx->as_nr);
-err_unmap_attachment:
-	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
-			alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
-	alloc->imported.umm.sgt = NULL;
-
-	return err;
-}
-
-static void kbase_jd_umm_unmap(struct kbase_context *kctx,
-		struct kbase_mem_phy_alloc *alloc)
-{
-	KBASE_DEBUG_ASSERT(kctx);
-	KBASE_DEBUG_ASSERT(alloc);
-	KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
-	KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
-	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
-	    alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
-	alloc->imported.umm.sgt = NULL;
-	alloc->nents = 0;
-}
-#endif				/* CONFIG_DMA_SHARED_BUFFER */
-
 struct kbase_mem_phy_alloc *kbase_map_external_resource(
 		struct kbase_context *kctx, struct kbase_va_region *reg,
 		struct mm_struct *locked_mm)
 {
 	int err;
 
+	lockdep_assert_held(&kctx->reg_lock);
+
 	/* decide what needs to happen for this resource */
 	switch (reg->gpu_alloc->type) {
 	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
-		if (reg->gpu_alloc->imported.user_buf.mm != locked_mm)
+		if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) &&
+		    (!reg->gpu_alloc->nents))
 			goto exit;
 
 		reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
@@ -3510,19 +3729,12 @@
 		}
 	}
 	break;
-#ifdef CONFIG_DMA_SHARED_BUFFER
 	case KBASE_MEM_TYPE_IMPORTED_UMM: {
-		reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
-		if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
-			err = kbase_jd_umm_map(kctx, reg);
-			if (err) {
-				reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
-				goto exit;
-			}
-		}
+		err = kbase_mem_umm_map(kctx, reg);
+		if (err)
+			goto exit;
 		break;
 	}
-#endif
 	default:
 		goto exit;
 	}
@@ -3536,35 +3748,18 @@
 		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
 {
 	switch (alloc->type) {
-#ifdef CONFIG_DMA_SHARED_BUFFER
 	case KBASE_MEM_TYPE_IMPORTED_UMM: {
-		alloc->imported.umm.current_mapping_usage_count--;
-
-		if (0 == alloc->imported.umm.current_mapping_usage_count) {
-			if (reg && reg->gpu_alloc == alloc) {
-				int err;
-
-				err = kbase_mmu_teardown_pages(
-						kctx->kbdev,
-						&kctx->mmu,
-						reg->start_pfn,
-						alloc->nents,
-						kctx->as_nr);
-				WARN_ON(err);
-			}
-
-			kbase_jd_umm_unmap(kctx, alloc);
-		}
+		kbase_mem_umm_unmap(kctx, reg, alloc);
 	}
 	break;
-#endif /* CONFIG_DMA_SHARED_BUFFER */
 	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
 		alloc->imported.user_buf.current_mapping_usage_count--;
 
 		if (0 == alloc->imported.user_buf.current_mapping_usage_count) {
 			bool writeable = true;
 
-			if (reg && reg->gpu_alloc == alloc)
+			if (!kbase_is_region_invalid_or_free(reg) &&
+					reg->gpu_alloc == alloc)
 				kbase_mmu_teardown_pages(
 						kctx->kbdev,
 						&kctx->mmu,
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h
index 6e38aa0..bebf55f 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,9 +39,6 @@
 #include <mali_kbase_hw.h>
 #include "mali_kbase_pm.h"
 #include "mali_kbase_defs.h"
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-#include "mali_kbase_gator.h"
-#endif
 /* Required for kbase_mem_evictable_unmake */
 #include "mali_kbase_mem_linux.h"
 
@@ -94,54 +91,61 @@
 /**
  * @brief Physical pages tracking object properties
   */
-#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1ul << 0)
-#define KBASE_MEM_PHY_ALLOC_LARGE            (1ul << 1)
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED  (1u << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE            (1u << 1)
 
-/* physical pages tracking object.
+/* struct kbase_mem_phy_alloc - Physical pages tracking object.
+ *
  * Set up to track N pages.
  * N not stored here, the creator holds that info.
  * This object only tracks how many elements are actually valid (present).
- * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
- * shared with another region or client. CPU mappings are OK to exist when changing, as
- * long as the tracked mappings objects are updated as part of the change.
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc
+ * is not shared with another region or client. CPU mappings are OK to
+ * exist when changing, as long as the tracked mappings objects are
+ * updated as part of the change.
+ *
+ * @kref: number of users of this alloc
+ * @gpu_mappings: count number of times mapped on the GPU
+ * @nents: 0..N
+ * @pages: N elements, only 0..nents are valid
+ * @mappings: List of CPU mappings of this physical memory allocation.
+ * @evict_node: Node used to store this allocation on the eviction list
+ * @evicted: Physical backing size when the pages where evicted
+ * @reg: Back reference to the region structure which created this
+ *       allocation, or NULL if it has been freed.
+ * @type: type of buffer
+ * @permanent_map: Kernel side mapping of the alloc, shall never be
+ *                 referred directly. kbase_phy_alloc_mapping_get() &
+ *                 kbase_phy_alloc_mapping_put() pair should be used
+ *                 around access to the kernel-side CPU mapping so that
+ *                 mapping doesn't disappear whilst it is being accessed.
+ * @properties: Bitmask of properties, e.g. KBASE_MEM_PHY_ALLOC_LARGE.
+ * @group_id: A memory group ID to be passed to a platform-specific
+ *            memory group manager, if present.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @imported: member in union valid based on @a type
  */
 struct kbase_mem_phy_alloc {
-	struct kref           kref; /* number of users of this alloc */
+	struct kref           kref;
 	atomic_t              gpu_mappings;
-	size_t                nents; /* 0..N */
-	struct tagged_addr    *pages; /* N elements, only 0..nents are valid */
-
-	/* kbase_cpu_mappings */
+	size_t                nents;
+	struct tagged_addr    *pages;
 	struct list_head      mappings;
-
-	/* Node used to store this allocation on the eviction list */
 	struct list_head      evict_node;
-	/* Physical backing size when the pages where evicted */
 	size_t                evicted;
-	/*
-	 * Back reference to the region structure which created this
-	 * allocation, or NULL if it has been freed.
-	 */
 	struct kbase_va_region *reg;
-
-	/* type of buffer */
 	enum kbase_memory_type type;
-
-	/* Kernel side mapping of the alloc */
 	struct kbase_vmap_struct *permanent_map;
+	u8 properties;
+	u8 group_id;
 
-	unsigned long properties;
-
-	/* member in union valid based on @a type */
 	union {
-#if defined(CONFIG_DMA_SHARED_BUFFER)
 		struct {
 			struct dma_buf *dma_buf;
 			struct dma_buf_attachment *dma_attachment;
 			unsigned int current_mapping_usage_count;
 			struct sg_table *sgt;
 		} umm;
-#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
 		struct {
 			u64 stride;
 			size_t nents;
@@ -286,7 +290,7 @@
 #define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
 #define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
 
-#define KBASE_REG_SECURE            (1ul << 19)
+#define KBASE_REG_PROTECTED         (1ul << 19)
 
 #define KBASE_REG_DONT_NEED         (1ul << 20)
 
@@ -302,8 +306,10 @@
  * Extent must be a power of 2 */
 #define KBASE_REG_TILER_ALIGN_TOP   (1ul << 23)
 
-/* Memory is handled by JIT - user space should not be able to free it */
-#define KBASE_REG_JIT               (1ul << 24)
+/* Whilst this flag is set the GPU allocation is not supposed to be freed by
+ * user space. The flag will remain set for the lifetime of JIT allocations.
+ */
+#define KBASE_REG_NO_USER_FREE      (1ul << 24)
 
 /* Memory has permanent kernel side mapping */
 #define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
@@ -341,6 +347,13 @@
 #define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
 /* end 32-bit clients only */
 
+/* The starting address and size of the GPU-executable zone are dynamic
+ * and depend on the platform and the number of pages requested by the
+ * user process, with an upper limit of 4 GB.
+ */
+#define KBASE_REG_ZONE_EXEC_VA           KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */
+
 
 	unsigned long flags;
 
@@ -459,7 +472,7 @@
 
 static inline struct kbase_mem_phy_alloc *kbase_alloc_create(
 		struct kbase_context *kctx, size_t nr_pages,
-		enum kbase_memory_type type)
+		enum kbase_memory_type type, int group_id)
 {
 	struct kbase_mem_phy_alloc *alloc;
 	size_t alloc_size = sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
@@ -506,6 +519,7 @@
 	alloc->pages = (void *)(alloc + 1);
 	INIT_LIST_HEAD(&alloc->mappings);
 	alloc->type = type;
+	alloc->group_id = group_id;
 
 	if (type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
 		alloc->imported.user_buf.dma_addrs =
@@ -515,7 +529,7 @@
 }
 
 static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
-		struct kbase_context *kctx)
+		struct kbase_context *kctx, int group_id)
 {
 	KBASE_DEBUG_ASSERT(reg);
 	KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
@@ -523,7 +537,7 @@
 	KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
 
 	reg->cpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
-			KBASE_MEM_TYPE_NATIVE);
+			KBASE_MEM_TYPE_NATIVE, group_id);
 	if (IS_ERR(reg->cpu_alloc))
 		return PTR_ERR(reg->cpu_alloc);
 	else if (!reg->cpu_alloc)
@@ -533,7 +547,7 @@
 	if (kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE)
 	    && (reg->flags & KBASE_REG_CPU_CACHED)) {
 		reg->gpu_alloc = kbase_alloc_create(kctx, reg->nr_pages,
-				KBASE_MEM_TYPE_NATIVE);
+				KBASE_MEM_TYPE_NATIVE, group_id);
 		if (IS_ERR_OR_NULL(reg->gpu_alloc)) {
 			kbase_mem_phy_alloc_put(reg->cpu_alloc);
 			return -ENOMEM;
@@ -553,24 +567,6 @@
 	return 0;
 }
 
-static inline u32 kbase_atomic_add_pages(u32 num_pages, atomic_t *used_pages)
-{
-	int new_val = atomic_add_return(num_pages, used_pages);
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
-#endif
-	return new_val;
-}
-
-static inline u32 kbase_atomic_sub_pages(u32 num_pages, atomic_t *used_pages)
-{
-	int new_val = atomic_sub_return(num_pages, used_pages);
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-	kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
-#endif
-	return new_val;
-}
-
 /*
  * Max size for kbdev memory pool (in pages)
  */
@@ -592,10 +588,42 @@
 #define KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER	0
 
 /**
+ * kbase_mem_pool_config_set_max_size - Set maximum number of free pages in
+ *                                      initial configuration of a memory pool
+ *
+ * @config:   Initial configuration for a physical memory pool
+ * @max_size: Maximum number of free pages that a pool created from
+ *            @config can hold
+ */
+static inline void kbase_mem_pool_config_set_max_size(
+	struct kbase_mem_pool_config *const config, size_t const max_size)
+{
+	WRITE_ONCE(config->max_size, max_size);
+}
+
+/**
+ * kbase_mem_pool_config_get_max_size - Get maximum number of free pages from
+ *                                      initial configuration of a memory pool
+ *
+ * @config: Initial configuration for a physical memory pool
+ *
+ * Return: Maximum number of free pages that a pool created from @config
+ *         can hold
+ */
+static inline size_t kbase_mem_pool_config_get_max_size(
+	const struct kbase_mem_pool_config *const config)
+{
+	return READ_ONCE(config->max_size);
+}
+
+/**
  * kbase_mem_pool_init - Create a memory pool for a kbase device
  * @pool:      Memory pool to initialize
- * @max_size:  Maximum number of free pages the pool can hold
+ * @config:    Initial configuration for the memory pool
  * @order:     Page order for physical page size (order=0=>4kB, order=9=>2MB)
+ * @group_id:  A memory group ID to be passed to a platform-specific
+ *             memory group manager, if present.
+ *             Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
  * @kbdev:     Kbase device where memory is used
  * @next_pool: Pointer to the next pool or NULL.
  *
@@ -606,7 +634,7 @@
  * certain corner cases grow above @max_size.
  *
  * If @next_pool is not NULL, we will allocate from @next_pool before going to
- * the kernel allocator. Similarily pages can spill over to @next_pool when
+ * the memory group manager. Similarly pages can spill over to @next_pool when
  * @pool is full. Pages are zeroed before they spill over to another pool, to
  * prevent leaking information between applications.
  *
@@ -616,8 +644,9 @@
  * Return: 0 on success, negative -errno on error
  */
 int kbase_mem_pool_init(struct kbase_mem_pool *pool,
-		size_t max_size,
-		size_t order,
+		const struct kbase_mem_pool_config *config,
+		unsigned int order,
+		int group_id,
 		struct kbase_device *kbdev,
 		struct kbase_mem_pool *next_pool);
 
@@ -695,7 +724,7 @@
 /**
  * kbase_mem_pool_alloc_pages - Allocate pages from memory pool
  * @pool:     Memory pool to allocate from
- * @nr_pages: Number of pages to allocate
+ * @nr_4k_pages: Number of pages to allocate
  * @pages:    Pointer to array where the physical address of the allocated
  *            pages will be stored.
  * @partial_allowed: If fewer pages allocated is allowed
@@ -714,7 +743,7 @@
  * the kernel OoM killer runs. If the caller must allocate pages while holding
  * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
  */
-int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_pages,
+int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 		struct tagged_addr *pages, bool partial_allowed);
 
 /**
@@ -866,9 +895,42 @@
  */
 struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool);
 
+/**
+ * kbase_region_tracker_init - Initialize the region tracker data structure
+ * @kctx: kbase context
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
 int kbase_region_tracker_init(struct kbase_context *kctx);
+
+/**
+ * kbase_region_tracker_init_jit - Initialize the JIT region
+ * @kctx: kbase context
+ * @jit_va_pages: Size of the JIT region in pages
+ * @max_allocations: Maximum number of allocations allowed for the JIT region
+ * @trim_level: Trim level for the JIT region
+ * @group_id: The physical group ID from which to allocate JIT memory.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
 int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
-		u8 max_allocations, u8 trim_level);
+		u8 max_allocations, u8 trim_level, int group_id);
+
+/**
+ * kbase_region_tracker_init_exec - Initialize the EXEC_VA region
+ * @kctx: kbase context
+ * @exec_va_pages: Size of the JIT region in pages.
+ *                 It must not be greater than 4 GB.
+ *
+ * Return: 0 if success, negative error code otherwise.
+ */
+int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages);
+
+/**
+ * kbase_region_tracker_term - Terminate the JIT region
+ * @kctx: kbase context
+ */
 void kbase_region_tracker_term(struct kbase_context *kctx);
 
 /**
@@ -951,42 +1013,66 @@
  *
  * The structure should be terminated using kbase_mmu_term()
  *
- * @kbdev: kbase device
- * @mmut:  structure to initialise
- * @kctx:  optional kbase context, may be NULL if this set of MMU tables is not
- *         associated with a context
+ * @kbdev:    Instance of GPU platform device, allocated from the probe method.
+ * @mmut:     GPU page tables to be initialized.
+ * @kctx:     Optional kbase context, may be NULL if this set of MMU tables
+ *            is not associated with a context.
+ * @group_id: The physical group ID from which to allocate GPU page tables.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return:    0 if successful, otherwise a negative error code.
  */
 int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-		struct kbase_context *kctx);
+		struct kbase_context *kctx, int group_id);
 /**
  * kbase_mmu_term - Terminate an object representing GPU page tables
  *
  * This will free any page tables that have been allocated
  *
- * @kbdev: kbase device
- * @mmut:  kbase_mmu_table to be destroyed
+ * @kbdev: Instance of GPU platform device, allocated from the probe method.
+ * @mmut:  GPU page tables to be destroyed.
  */
 void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut);
 
+/**
+ * kbase_mmu_create_ate - Create an address translation entry
+ *
+ * @kbdev:    Instance of GPU platform device, allocated from the probe method.
+ * @phy:      Physical address of the page to be mapped for GPU access.
+ * @flags:    Bitmask of attributes of the GPU memory region being mapped.
+ * @level:    Page table level for which to build an address translation entry.
+ * @group_id: The physical memory group in which the page was allocated.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * This function creates an address translation entry to encode the physical
+ * address of a page to be mapped for access by the GPU, along with any extra
+ * attributes required for the GPU memory region.
+ *
+ * Return: An address translation entry, either in LPAE or AArch64 format
+ *         (depending on the driver's configuration).
+ */
+u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
+	struct tagged_addr phy, unsigned long flags, int level, int group_id);
+
 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
 				    struct kbase_mmu_table *mmut,
 				    const u64 start_vpfn,
 				    struct tagged_addr *phys, size_t nr,
-				    unsigned long flags);
+				    unsigned long flags, int group_id);
 int kbase_mmu_insert_pages(struct kbase_device *kbdev,
 			   struct kbase_mmu_table *mmut, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr,
-			   unsigned long flags, int as_nr);
+			   unsigned long flags, int as_nr, int group_id);
 int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 					struct tagged_addr phys, size_t nr,
-					unsigned long flags);
+					unsigned long flags, int group_id);
 
 int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
 			     struct kbase_mmu_table *mmut, u64 vpfn,
 			     size_t nr, int as_nr);
 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr,
-			   unsigned long flags);
+			   unsigned long flags, int const group_id);
 
 /**
  * @brief Register region and map it on the GPU.
@@ -1253,6 +1339,8 @@
 *
 * @param[in] alloc allocation object to free pages from
 * @param[in] nr_pages_to_free number of physical pages to free
+*
+* Return: 0 on success, otherwise a negative error code
 */
 int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
 
@@ -1305,17 +1393,18 @@
 }
 
 /**
-* @brief Process a bus or page fault.
-*
-* This function will process a fault on a specific address space
-*
-* @param[in] kbdev   The @ref kbase_device the fault happened on
-* @param[in] kctx    The @ref kbase_context for the faulting address space if
-*                    one was found.
-* @param[in] as      The address space that has the fault
-*/
+ * kbase_mmu_interrupt_process - Process a bus or page fault.
+ * @kbdev   The kbase_device the fault happened on
+ * @kctx    The kbase_context for the faulting address space if one was found.
+ * @as      The address space that has the fault
+ * @fault   Data relating to the fault
+ *
+ * This function will process a fault on a specific address space
+ */
 void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
-		struct kbase_context *kctx, struct kbase_as *as);
+		struct kbase_context *kctx, struct kbase_as *as,
+		struct kbase_fault *fault);
+
 
 /**
  * @brief Process a page fault.
@@ -1422,6 +1511,18 @@
 void kbase_jit_term(struct kbase_context *kctx);
 
 /**
+ * kbase_has_exec_va_zone - EXEC_VA zone predicate
+ *
+ * Determine whether an EXEC_VA zone has been created for the GPU address space
+ * of the given kbase context.
+ *
+ * @kctx: kbase context
+ *
+ * Return: True if the kbase context has an EXEC_VA zone.
+ */
+bool kbase_has_exec_va_zone(struct kbase_context *kctx);
+
+/**
  * kbase_map_external_resource - Map an external resource to the GPU.
  * @kctx:              kbase context.
  * @reg:               The region to map.
@@ -1443,6 +1544,21 @@
 void kbase_unmap_external_resource(struct kbase_context *kctx,
 		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
 
+
+/**
+ * kbase_jd_user_buf_pin_pages - Pin the pages of a user buffer.
+ * @kctx: kbase context.
+ * @reg:  The region associated with the imported user buffer.
+ *
+ * To successfully pin the pages for a user buffer the current mm_struct must
+ * be the same as the mm_struct of the user buffer. After successfully pinning
+ * the pages further calls to this function succeed without doing work.
+ *
+ * Return: zero on success or negative number on failure.
+ */
+int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
+		struct kbase_va_region *reg);
+
 /**
  * kbase_sticky_resource_init - Initialize sticky resource management.
  * @kctx: kbase context
@@ -1508,4 +1624,52 @@
 void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc);
 
 
+/**
+ * kbase_mem_umm_map - Map dma-buf
+ * @kctx: Pointer to the kbase context
+ * @reg: Pointer to the region of the imported dma-buf to map
+ *
+ * Map a dma-buf on the GPU. The mappings are reference counted.
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int kbase_mem_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg);
+
+/**
+ * kbase_mem_umm_unmap - Unmap dma-buf
+ * @kctx: Pointer to the kbase context
+ * @reg: Pointer to the region of the imported dma-buf to unmap
+ * @alloc: Pointer to the alloc to release
+ *
+ * Unmap a dma-buf from the GPU. The mappings are reference counted.
+ *
+ * @reg must be the original region with GPU mapping of @alloc; or NULL. If
+ * @reg is NULL, or doesn't match @alloc, the GPU page table entries matching
+ * @reg will not be updated.
+ *
+ * @alloc must be a valid physical allocation of type
+ * KBASE_MEM_TYPE_IMPORTED_UMM that was previously mapped by
+ * kbase_mem_umm_map(). The dma-buf attachment referenced by @alloc will
+ * release it's mapping reference, and if the refcount reaches 0, also be be
+ * unmapped, regardless of the value of @reg.
+ */
+void kbase_mem_umm_unmap(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_mem_do_sync_imported - Sync caches for imported memory
+ * @kctx: Pointer to the kbase context
+ * @reg: Pointer to the region with imported memory to sync
+ * @sync_fn: The type of sync operation to perform
+ *
+ * Sync CPU caches for supported (currently only dma-buf (UMM)) memory.
+ * Attempting to sync unsupported imported memory types will result in an error
+ * code, -EINVAL.
+ *
+ * Return: 0 on success, or a negative error code.
+ */
+int kbase_mem_do_sync_imported(struct kbase_context *kctx,
+		struct kbase_va_region *reg, enum kbase_sync_type sync_fn);
+
 #endif				/* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
index f08b0c0..50a74ad 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,17 +39,51 @@
 	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
 #include <linux/dma-attrs.h>
 #endif /* LINUX_VERSION_CODE >= 3.5.0 && < 4.8.0 */
-#ifdef CONFIG_DMA_SHARED_BUFFER
 #include <linux/dma-buf.h>
-#endif				/* defined(CONFIG_DMA_SHARED_BUFFER) */
 #include <linux/shrinker.h>
 #include <linux/cache.h>
+#include <linux/memory_group_manager.h>
 
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
 #include <mali_kbase_ioctl.h>
 
+#if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \
+	(KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE))
+/* Enable workaround for ion for kernels prior to v5.0.0 and from v5.3.0
+ * onwards.
+ *
+ * For kernels prior to v4.12, workaround is needed as ion lacks the cache
+ * maintenance in begin_cpu_access and end_cpu_access methods.
+ *
+ * For kernels prior to v4.17.2, workaround is needed to avoid the potentially
+ * disruptive warnings which can come if begin_cpu_access and end_cpu_access
+ * methods are not called in pairs.
+ * Note that some long term maintenance kernel versions (e.g. 4.9.x, 4.14.x)
+ * only require this workaround on their earlier releases. However it is still
+ * safe to use it on such releases, and it simplifies the version check.
+ *
+ * For kernels later than v4.17.2, workaround is needed as ion can potentially
+ * end up calling dma_sync_sg_for_* for a dma-buf importer that hasn't mapped
+ * the attachment. This would result in a kernel panic as ion populates the
+ * dma_address when the attachment is mapped and kernel derives the physical
+ * address for cache maintenance from the dma_address.
+ * With some multi-threaded tests it has been seen that the same dma-buf memory
+ * gets imported twice on Mali DDK side and so the problem of sync happening
+ * with an importer having an unmapped attachment comes at the time of 2nd
+ * import. The same problem can if there is another importer of dma-buf
+ * memory.
+ *
+ * Workaround can be safely disabled for kernels between v5.0.0 and v5.2.2,
+ * as all the above stated issues are not there.
+ *
+ * dma_sync_sg_for_* calls will be made directly as a workaround using the
+ * Kbase's attachment to dma-buf that was previously mapped.
+ */
+#define KBASE_MEM_ION_SYNC_WORKAROUND
+#endif
+
 
 static int kbase_vmap_phy_pages(struct kbase_context *kctx,
 		struct kbase_va_region *reg, u64 offset_bytes, size_t size,
@@ -121,11 +155,11 @@
 		return -EINVAL;
 
 	if (size > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
-			kctx->permanent_mapped_pages)) {
-		dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %lu pages",
+			atomic_read(&kctx->permanent_mapped_pages))) {
+		dev_warn(kctx->kbdev->dev, "Request for %llu more pages mem needing a permanent mapping would breach limit %lu, currently at %d pages",
 				(u64)size,
 				KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES,
-				kctx->permanent_mapped_pages);
+				atomic_read(&kctx->permanent_mapped_pages));
 		return -ENOMEM;
 	}
 
@@ -141,7 +175,7 @@
 	reg->flags &= ~KBASE_REG_GROWABLE;
 
 	reg->cpu_alloc->permanent_map = kern_mapping;
-	kctx->permanent_mapped_pages += size;
+	atomic_add(size, &kctx->permanent_mapped_pages);
 
 	return 0;
 vmap_fail:
@@ -162,8 +196,8 @@
 	 * this being reduced a second time if a separate gpu_alloc is
 	 * freed
 	 */
-	WARN_ON(alloc->nents > kctx->permanent_mapped_pages);
-	kctx->permanent_mapped_pages -= alloc->nents;
+	WARN_ON(alloc->nents > atomic_read(&kctx->permanent_mapped_pages));
+	atomic_sub(alloc->nents, &kctx->permanent_mapped_pages);
 }
 
 void *kbase_phy_alloc_mapping_get(struct kbase_context *kctx,
@@ -241,6 +275,9 @@
 	KBASE_DEBUG_ASSERT(gpu_va);
 
 	dev = kctx->kbdev->dev;
+	dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n",
+		va_pages, commit_pages, extent, *flags);
+
 	*gpu_va = 0; /* return 0 on failure */
 
 	if (!kbase_check_alloc_flags(*flags)) {
@@ -250,6 +287,16 @@
 		goto bad_flags;
 	}
 
+#ifdef CONFIG_DEBUG_FS
+	if (unlikely(kbase_ctx_flag(kctx, KCTX_INFINITE_CACHE))) {
+		/* Mask coherency flags if infinite cache is enabled to prevent
+		 * the skipping of syncs from BASE side.
+		 */
+		*flags &= ~(BASE_MEM_COHERENT_SYSTEM_REQUIRED |
+			    BASE_MEM_COHERENT_SYSTEM);
+	}
+#endif
+
 	if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 &&
 			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) {
 		/* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */
@@ -269,10 +316,19 @@
 	if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent))
 		goto bad_sizes;
 
+#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
+	/* Ensure that memory is fully physically-backed. */
+	if (*flags & BASE_MEM_GROW_ON_GPF)
+		commit_pages = va_pages;
+#endif
+
 	/* find out which VA zone to use */
 	if (*flags & BASE_MEM_SAME_VA) {
 		rbtree = &kctx->reg_rbtree_same;
 		zone = KBASE_REG_ZONE_SAME_VA;
+	} else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) {
+		rbtree = &kctx->reg_rbtree_exec;
+		zone = KBASE_REG_ZONE_EXEC_VA;
 	} else {
 		rbtree = &kctx->reg_rbtree_custom;
 		zone = KBASE_REG_ZONE_CUSTOM_VA;
@@ -287,7 +343,8 @@
 	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
 		goto invalid_flags;
 
-	if (kbase_reg_prepare_native(reg, kctx) != 0) {
+	if (kbase_reg_prepare_native(reg, kctx,
+				base_mem_group_id_get(*flags)) != 0) {
 		dev_err(dev, "Failed to prepare region");
 		goto prepare_failed;
 	}
@@ -327,11 +384,7 @@
 
 	/* mmap needed to setup VA? */
 	if (*flags & BASE_MEM_SAME_VA) {
-		unsigned long prot = PROT_NONE;
-		unsigned long va_size = va_pages << PAGE_SHIFT;
-		unsigned long va_map = va_size;
 		unsigned long cookie, cookie_nr;
-		unsigned long cpu_addr;
 
 		/* Bind to a cookie */
 		if (!kctx->cookies) {
@@ -345,38 +398,11 @@
 		BUG_ON(kctx->pending_regions[cookie_nr]);
 		kctx->pending_regions[cookie_nr] = reg;
 
-		kbase_gpu_vm_unlock(kctx);
-
 		/* relocate to correct base */
 		cookie = cookie_nr + PFN_DOWN(BASE_MEM_COOKIE_BASE);
 		cookie <<= PAGE_SHIFT;
 
-		/*
-		 * 10.1-10.4 UKU userland relies on the kernel to call mmap.
-		 * For all other versions we can just return the cookie
-		 */
-		if (kctx->api_version < KBASE_API_VERSION(10, 1) ||
-		    kctx->api_version > KBASE_API_VERSION(10, 4)) {
-			*gpu_va = (u64) cookie;
-			return reg;
-		}
-		if (*flags & BASE_MEM_PROT_CPU_RD)
-			prot |= PROT_READ;
-		if (*flags & BASE_MEM_PROT_CPU_WR)
-			prot |= PROT_WRITE;
-
-		cpu_addr = vm_mmap(kctx->filp, 0, va_map, prot,
-				MAP_SHARED, cookie);
-
-		if (IS_ERR_VALUE(cpu_addr)) {
-			kbase_gpu_vm_lock(kctx);
-			kctx->pending_regions[cookie_nr] = NULL;
-			kctx->cookies |= (1UL << cookie_nr);
-			kbase_gpu_vm_unlock(kctx);
-			goto no_mmap;
-		}
-
-		*gpu_va = (u64) cpu_addr;
+		*gpu_va = (u64) cookie;
 	} else /* we control the VA */ {
 		if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
 			dev_warn(dev, "Failed to map memory on GPU");
@@ -385,10 +411,9 @@
 		}
 		/* return real GPU VA */
 		*gpu_va = reg->start_pfn << PAGE_SHIFT;
-
-		kbase_gpu_vm_unlock(kctx);
 	}
 
+	kbase_gpu_vm_unlock(kctx);
 	return reg;
 
 no_mmap:
@@ -471,8 +496,8 @@
 			 * for compatibility reasons */
 			if (KBASE_REG_PF_GROW & reg->flags)
 				*out |= BASE_MEM_GROW_ON_GPF;
-			if (KBASE_REG_SECURE & reg->flags)
-				*out |= BASE_MEM_SECURE;
+			if (KBASE_REG_PROTECTED & reg->flags)
+				*out |= BASE_MEM_PROTECTED;
 		}
 		if (KBASE_REG_TILER_ALIGN_TOP & reg->flags)
 			*out |= BASE_MEM_TILER_ALIGN_TOP;
@@ -481,6 +506,8 @@
 		if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
 			*out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
 
+		*out |= base_mem_group_id_set(reg->cpu_alloc->group_id);
+
 		WARN(*out & ~BASE_MEM_FLAGS_QUERYABLE,
 				"BASE_MEM_FLAGS_QUERYABLE needs updating\n");
 		*out &= BASE_MEM_FLAGS_QUERYABLE;
@@ -643,14 +670,16 @@
 void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
 {
 	struct kbase_context *kctx = alloc->imported.native.kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
 	int __maybe_unused new_page_count;
 
 	kbase_process_page_usage_dec(kctx, alloc->nents);
-	new_page_count = kbase_atomic_sub_pages(alloc->nents,
-						&kctx->used_pages);
-	kbase_atomic_sub_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+	new_page_count = atomic_sub_return(alloc->nents,
+		&kctx->used_pages);
+	atomic_sub(alloc->nents, &kctx->kbdev->memdev.used_pages);
 
 	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
 			kctx->id,
 			(u64)new_page_count);
 }
@@ -663,11 +692,12 @@
 void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
 {
 	struct kbase_context *kctx = alloc->imported.native.kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
 	int __maybe_unused new_page_count;
 
-	new_page_count = kbase_atomic_add_pages(alloc->nents,
-						&kctx->used_pages);
-	kbase_atomic_add_pages(alloc->nents, &kctx->kbdev->memdev.used_pages);
+	new_page_count = atomic_add_return(alloc->nents,
+		&kctx->used_pages);
+	atomic_add(alloc->nents, &kctx->kbdev->memdev.used_pages);
 
 	/* Increase mm counters so that the allocation is accounted for
 	 * against the process and thus is visible to the OOM killer,
@@ -675,6 +705,7 @@
 	kbase_process_page_usage_inc(kctx, alloc->nents);
 
 	KBASE_TLSTREAM_AUX_PAGESALLOC(
+			kbdev,
 			kctx->id,
 			(u64)new_page_count);
 }
@@ -757,7 +788,7 @@
 	struct kbase_va_region *reg;
 	int ret = -EINVAL;
 	unsigned int real_flags = 0;
-	unsigned int prev_flags = 0;
+	unsigned int new_flags = 0;
 	bool prev_needed, new_needed;
 
 	KBASE_DEBUG_ASSERT(kctx);
@@ -832,25 +863,55 @@
 		goto out_unlock;
 	}
 
-	/* save for roll back */
-	prev_flags = reg->flags;
-	reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
-	reg->flags |= real_flags;
+	new_flags = reg->flags & ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+	new_flags |= real_flags;
 
 	/* Currently supporting only imported memory */
-#ifdef CONFIG_DMA_SHARED_BUFFER
-	if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
-		/* Future use will use the new flags, existing mapping will NOT be updated
-		 * as memory should not be in use by the GPU when updating the flags.
-		 */
-		ret = 0;
-		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM) {
+		ret = -EINVAL;
+		goto out_unlock;
 	}
-#endif /* CONFIG_DMA_SHARED_BUFFER */
 
-	/* roll back on error */
-	if (ret)
-		reg->flags = prev_flags;
+	if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
+		/* Future use will use the new flags, existing mapping
+		 * will NOT be updated as memory should not be in use
+		 * by the GPU when updating the flags.
+		 */
+		WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+		ret = 0;
+	} else if (reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+		/*
+		 * When CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND is not enabled the
+		 * dma-buf GPU mapping should always be present, check that
+		 * this is the case and warn and skip the page table update if
+		 * not.
+		 *
+		 * Then update dma-buf GPU mapping with the new flags.
+		 *
+		 * Note: The buffer must not be in use on the GPU when
+		 * changing flags. If the buffer is in active use on
+		 * the GPU, there is a risk that the GPU may trigger a
+		 * shareability fault, as it will see the same
+		 * addresses from buffer with different shareability
+		 * properties.
+		 */
+		dev_dbg(kctx->kbdev->dev,
+			"Updating page tables on mem flag change\n");
+		ret = kbase_mmu_update_pages(kctx, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg),
+				kbase_reg_current_backed_size(reg),
+				new_flags,
+				reg->gpu_alloc->group_id);
+		if (ret)
+			dev_warn(kctx->kbdev->dev,
+				 "Failed to update GPU page tables on flag change: %d\n",
+				 ret);
+	} else
+		WARN_ON(!reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+
+	/* If everything is good, then set the new flags on the region. */
+	if (!ret)
+		reg->flags = new_flags;
 
 out_unlock:
 	kbase_gpu_vm_unlock(kctx);
@@ -861,7 +922,335 @@
 
 #define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
 
-#ifdef CONFIG_DMA_SHARED_BUFFER
+int kbase_mem_do_sync_imported(struct kbase_context *kctx,
+		struct kbase_va_region *reg, enum kbase_sync_type sync_fn)
+{
+	int ret = -EINVAL;
+	struct dma_buf *dma_buf;
+	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	/* We assume that the same physical allocation object is used for both
+	 * GPU and CPU for imported buffers.
+	 */
+	WARN_ON(reg->cpu_alloc != reg->gpu_alloc);
+
+	/* Currently only handle dma-bufs */
+	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)
+		return ret;
+	/*
+	 * Attempting to sync with CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND
+	 * enabled can expose us to a Linux Kernel issue between v4.6 and
+	 * v4.19. We will not attempt to support cache syncs on dma-bufs that
+	 * are mapped on demand (i.e. not on import), even on pre-4.6, neither
+	 * on 4.20 or newer kernels, because this makes it difficult for
+	 * userspace to know when they can rely on the cache sync.
+	 * Instead, only support syncing when we always map dma-bufs on import,
+	 * or if the particular buffer is mapped right now.
+	 */
+	if (IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND) &&
+	    !reg->gpu_alloc->imported.umm.current_mapping_usage_count)
+		return ret;
+
+	dma_buf = reg->gpu_alloc->imported.umm.dma_buf;
+
+	switch (sync_fn) {
+	case KBASE_SYNC_TO_DEVICE:
+		dev_dbg(kctx->kbdev->dev,
+			"Syncing imported buffer at GPU VA %llx to GPU\n",
+			reg->start_pfn);
+#ifdef KBASE_MEM_ION_SYNC_WORKAROUND
+		if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) {
+			struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment;
+			struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt;
+
+			dma_sync_sg_for_device(attachment->dev, sgt->sgl,
+					sgt->nents, dir);
+			ret = 0;
+		}
+#else
+	/* Though the below version check could be superfluous depending upon the version condition
+	 * used for enabling KBASE_MEM_ION_SYNC_WORKAROUND, we still keep this check here to allow
+	 * ease of modification for non-ION systems or systems where ION has been patched.
+	 */
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
+		dma_buf_end_cpu_access(dma_buf,
+				0, dma_buf->size,
+				dir);
+		ret = 0;
+#else
+		ret = dma_buf_end_cpu_access(dma_buf,
+				dir);
+#endif
+#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
+		break;
+	case KBASE_SYNC_TO_CPU:
+		dev_dbg(kctx->kbdev->dev,
+			"Syncing imported buffer at GPU VA %llx to CPU\n",
+			reg->start_pfn);
+#ifdef KBASE_MEM_ION_SYNC_WORKAROUND
+		if (!WARN_ON(!reg->gpu_alloc->imported.umm.dma_attachment)) {
+			struct dma_buf_attachment *attachment = reg->gpu_alloc->imported.umm.dma_attachment;
+			struct sg_table *sgt = reg->gpu_alloc->imported.umm.sgt;
+
+			dma_sync_sg_for_cpu(attachment->dev, sgt->sgl,
+					sgt->nents, dir);
+			ret = 0;
+		}
+#else
+		ret = dma_buf_begin_cpu_access(dma_buf,
+#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE && !defined(CONFIG_CHROMEOS)
+				0, dma_buf->size,
+#endif
+				dir);
+#endif /* KBASE_MEM_ION_SYNC_WORKAROUND */
+		break;
+	};
+
+	if (unlikely(ret))
+		dev_warn(kctx->kbdev->dev,
+			 "Failed to sync mem region %pK at GPU VA %llx: %d\n",
+			 reg, reg->start_pfn, ret);
+
+	return ret;
+}
+
+/**
+ * kbase_mem_umm_unmap_attachment - Unmap dma-buf attachment
+ * @kctx: Pointer to kbase context
+ * @alloc: Pointer to allocation with imported dma-buf memory to unmap
+ *
+ * This will unmap a dma-buf. Must be called after the GPU page tables for the
+ * region have been torn down.
+ */
+static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx,
+					   struct kbase_mem_phy_alloc *alloc)
+{
+	struct tagged_addr *pa = alloc->pages;
+
+	dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+				 alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+	alloc->imported.umm.sgt = NULL;
+
+	memset(pa, 0xff, sizeof(*pa) * alloc->nents);
+	alloc->nents = 0;
+}
+
+/**
+ * kbase_mem_umm_map_attachment - Prepare attached dma-buf for GPU mapping
+ * @kctx: Pointer to kbase context
+ * @reg: Pointer to region with imported dma-buf memory to map
+ *
+ * Map the dma-buf and prepare the page array with the tagged Mali physical
+ * addresses for GPU mapping.
+ *
+ * Return: 0 on success, or negative error code
+ */
+static int kbase_mem_umm_map_attachment(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	struct sg_table *sgt;
+	struct scatterlist *s;
+	int i;
+	struct tagged_addr *pa;
+	int err;
+	size_t count = 0;
+	struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+
+	WARN_ON_ONCE(alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM);
+	WARN_ON_ONCE(alloc->imported.umm.sgt);
+
+	sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment,
+			DMA_BIDIRECTIONAL);
+	if (IS_ERR_OR_NULL(sgt))
+		return -EINVAL;
+
+	/* save for later */
+	alloc->imported.umm.sgt = sgt;
+
+	pa = kbase_get_gpu_phy_pages(reg);
+
+	for_each_sg(sgt->sgl, s, sgt->nents, i) {
+		size_t j, pages = PFN_UP(sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+		"sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+		sg_dma_len(s));
+
+		WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+		"sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+		(unsigned long long) sg_dma_address(s));
+
+		for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++)
+			*pa++ = as_tagged(sg_dma_address(s) +
+				(j << PAGE_SHIFT));
+		WARN_ONCE(j < pages,
+		"sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+		alloc->imported.umm.dma_buf->size);
+	}
+
+	if (!(reg->flags & KBASE_REG_IMPORT_PAD) &&
+			WARN_ONCE(count < reg->nr_pages,
+			"sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+			alloc->imported.umm.dma_buf->size)) {
+		err = -EINVAL;
+		goto err_unmap_attachment;
+	}
+
+	/* Update nents as we now have pages to map */
+	alloc->nents = count;
+
+	return 0;
+
+err_unmap_attachment:
+	kbase_mem_umm_unmap_attachment(kctx, alloc);
+
+	return err;
+}
+
+int kbase_mem_umm_map(struct kbase_context *kctx,
+		struct kbase_va_region *reg)
+{
+	int err;
+	struct kbase_mem_phy_alloc *alloc;
+	unsigned long gwt_mask = ~0;
+
+	lockdep_assert_held(&kctx->reg_lock);
+
+	alloc = reg->gpu_alloc;
+
+	alloc->imported.umm.current_mapping_usage_count++;
+	if (alloc->imported.umm.current_mapping_usage_count != 1) {
+		if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT)) {
+			if (!kbase_is_region_invalid_or_free(reg)) {
+				err = kbase_mem_do_sync_imported(kctx, reg,
+						KBASE_SYNC_TO_DEVICE);
+				WARN_ON_ONCE(err);
+			}
+		}
+		return 0;
+	}
+
+	err = kbase_mem_umm_map_attachment(kctx, reg);
+	if (err)
+		goto bad_map_attachment;
+
+#ifdef CONFIG_MALI_CINSTR_GWT
+	if (kctx->gwt_enabled)
+		gwt_mask = ~KBASE_REG_GPU_WR;
+#endif
+
+	err = kbase_mmu_insert_pages(kctx->kbdev,
+				     &kctx->mmu,
+				     reg->start_pfn,
+				     kbase_get_gpu_phy_pages(reg),
+				     kbase_reg_current_backed_size(reg),
+				     reg->flags & gwt_mask,
+				     kctx->as_nr,
+				     alloc->group_id);
+	if (err)
+		goto bad_insert;
+
+	if (reg->flags & KBASE_REG_IMPORT_PAD &&
+			!WARN_ON(reg->nr_pages < alloc->nents)) {
+		/* For padded imported dma-buf memory, map the dummy aliasing
+		 * page from the end of the dma-buf pages, to the end of the
+		 * region using a read only mapping.
+		 *
+		 * Assume alloc->nents is the number of actual pages in the
+		 * dma-buf memory.
+		 */
+		err = kbase_mmu_insert_single_page(kctx,
+				reg->start_pfn + alloc->nents,
+				kctx->aliasing_sink_page,
+				reg->nr_pages - alloc->nents,
+				(reg->flags | KBASE_REG_GPU_RD) &
+				~KBASE_REG_GPU_WR,
+				KBASE_MEM_GROUP_SINK);
+		if (err)
+			goto bad_pad_insert;
+	}
+
+	return 0;
+
+bad_pad_insert:
+	kbase_mmu_teardown_pages(kctx->kbdev,
+				 &kctx->mmu,
+				 reg->start_pfn,
+				 alloc->nents,
+				 kctx->as_nr);
+bad_insert:
+	kbase_mem_umm_unmap_attachment(kctx, alloc);
+bad_map_attachment:
+	alloc->imported.umm.current_mapping_usage_count--;
+
+	return err;
+}
+
+void kbase_mem_umm_unmap(struct kbase_context *kctx,
+		struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+{
+	alloc->imported.umm.current_mapping_usage_count--;
+	if (alloc->imported.umm.current_mapping_usage_count) {
+		if (IS_ENABLED(CONFIG_MALI_DMA_BUF_LEGACY_COMPAT)) {
+			if (!kbase_is_region_invalid_or_free(reg)) {
+				int err = kbase_mem_do_sync_imported(kctx, reg,
+						KBASE_SYNC_TO_CPU);
+				WARN_ON_ONCE(err);
+			}
+		}
+		return;
+	}
+
+	if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) {
+		int err;
+
+		err = kbase_mmu_teardown_pages(kctx->kbdev,
+					       &kctx->mmu,
+					       reg->start_pfn,
+					       reg->nr_pages,
+					       kctx->as_nr);
+		WARN_ON(err);
+	}
+
+	kbase_mem_umm_unmap_attachment(kctx, alloc);
+}
+
+static int get_umm_memory_group_id(struct kbase_context *kctx,
+		struct dma_buf *dma_buf)
+{
+	int group_id = BASE_MEM_GROUP_DEFAULT;
+
+	if (kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id) {
+		struct memory_group_manager_import_data mgm_import_data;
+
+		mgm_import_data.type =
+			MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF;
+		mgm_import_data.u.dma_buf = dma_buf;
+
+		group_id = kctx->kbdev->mgm_dev->ops.mgm_get_import_memory_id(
+			kctx->kbdev->mgm_dev, &mgm_import_data);
+	}
+
+	return group_id;
+}
+
+/**
+ * kbase_mem_from_umm - Import dma-buf memory into kctx
+ * @kctx: Pointer to kbase context to import memory into
+ * @fd: File descriptor of dma-buf to import
+ * @va_pages: Pointer where virtual size of the region will be output
+ * @flags: Pointer to memory flags
+ * @padding: Number of read only padding pages to be inserted at the end of the
+ * GPU mapping of the dma-buf
+ *
+ * Return: Pointer to new kbase_va_region object of the imported dma-buf, or
+ * NULL on error.
+ *
+ * This function imports a dma-buf into kctx, and created a kbase_va_region
+ * object that wraps the dma-buf.
+ */
 static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 		int fd, u64 *va_pages, u64 *flags, u32 padding)
 {
@@ -869,26 +1258,43 @@
 	struct dma_buf *dma_buf;
 	struct dma_buf_attachment *dma_attachment;
 	bool shared_zone = false;
+	int group_id;
+
+	/* 64-bit address range is the max */
+	if (*va_pages > (U64_MAX / PAGE_SIZE))
+		return NULL;
 
 	dma_buf = dma_buf_get(fd);
 	if (IS_ERR_OR_NULL(dma_buf))
-		goto no_buf;
+		return NULL;
 
 	dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
-	if (!dma_attachment)
-		goto no_attachment;
+	if (IS_ERR_OR_NULL(dma_attachment)) {
+		dma_buf_put(dma_buf);
+		return NULL;
+	}
 
 	*va_pages = (PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT) + padding;
-	if (!*va_pages)
-		goto bad_size;
-
-	if (*va_pages > (U64_MAX / PAGE_SIZE))
-		/* 64-bit address range is the max */
-		goto bad_size;
+	if (!*va_pages) {
+		dma_buf_detach(dma_buf, dma_attachment);
+		dma_buf_put(dma_buf);
+		return NULL;
+	}
 
 	/* ignore SAME_VA */
 	*flags &= ~BASE_MEM_SAME_VA;
 
+	/*
+	 * Force CPU cached flag.
+	 *
+	 * We can't query the dma-buf exporter to get details about the CPU
+	 * cache attributes of CPU mappings, so we have to assume that the
+	 * buffer may be cached, and call into the exporter for cache
+	 * maintenance, and rely on the exporter to do the right thing when
+	 * handling our calls.
+	 */
+	*flags |= BASE_MEM_CACHED_CPU;
+
 	if (*flags & BASE_MEM_IMPORT_SHARED)
 		shared_zone = true;
 
@@ -911,28 +1317,33 @@
 				0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
 	}
 
-	if (!reg)
-		goto no_region;
+	if (!reg) {
+		dma_buf_detach(dma_buf, dma_attachment);
+		dma_buf_put(dma_buf);
+		return NULL;
+	}
+
+	group_id = get_umm_memory_group_id(kctx, dma_buf);
 
 	reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
-			KBASE_MEM_TYPE_IMPORTED_UMM);
+			KBASE_MEM_TYPE_IMPORTED_UMM, group_id);
 	if (IS_ERR_OR_NULL(reg->gpu_alloc))
-		goto no_alloc_obj;
+		goto no_alloc;
 
 	reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
 
+	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
+		goto error_out;
+
 	/* No pages to map yet */
 	reg->gpu_alloc->nents = 0;
 
-	if (kbase_update_region_flags(kctx, reg, *flags) != 0)
-		goto invalid_flags;
-
 	reg->flags &= ~KBASE_REG_FREE;
 	reg->flags |= KBASE_REG_GPU_NX;	/* UMM is always No eXecute */
 	reg->flags &= ~KBASE_REG_GROWABLE;	/* UMM cannot be grown */
 
-	if (*flags & BASE_MEM_SECURE)
-		reg->flags |= KBASE_REG_SECURE;
+	if (*flags & BASE_MEM_PROTECTED)
+		reg->flags |= KBASE_REG_PROTECTED;
 
 	if (padding)
 		reg->flags |= KBASE_REG_IMPORT_PAD;
@@ -944,28 +1355,38 @@
 	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
 	reg->extent = 0;
 
+	if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
+		int err;
+
+		reg->gpu_alloc->imported.umm.current_mapping_usage_count = 1;
+
+		err = kbase_mem_umm_map_attachment(kctx, reg);
+		if (err) {
+			dev_warn(kctx->kbdev->dev,
+				 "Failed to map dma-buf %pK on GPU: %d\n",
+				 dma_buf, err);
+			goto error_out;
+		}
+
+		*flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+	}
+
 	return reg;
 
-invalid_flags:
+error_out:
 	kbase_mem_phy_alloc_put(reg->gpu_alloc);
 	kbase_mem_phy_alloc_put(reg->cpu_alloc);
-no_alloc_obj:
+no_alloc:
 	kfree(reg);
-no_region:
-bad_size:
-	dma_buf_detach(dma_buf, dma_attachment);
-no_attachment:
-	dma_buf_put(dma_buf);
-no_buf:
+
 	return NULL;
 }
-#endif  /* CONFIG_DMA_SHARED_BUFFER */
 
-static u32 kbase_get_cache_line_alignment(struct kbase_context *kctx)
+u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev)
 {
 	u32 cpu_cache_line_size = cache_line_size();
 	u32 gpu_cache_line_size =
-		(1UL << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
+		(1UL << kbdev->gpu_props.props.l2_props.log2_line_size);
 
 	return ((cpu_cache_line_size > gpu_cache_line_size) ?
 				cpu_cache_line_size :
@@ -982,7 +1403,7 @@
 	long faulted_pages;
 	int zone = KBASE_REG_ZONE_CUSTOM_VA;
 	bool shared_zone = false;
-	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx);
+	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
 	struct kbase_alloc_import_user_buf *user_buf;
 	struct page **pages = NULL;
 
@@ -1045,8 +1466,9 @@
 	if (!reg)
 		goto no_region;
 
-	reg->gpu_alloc = kbase_alloc_create(kctx, *va_pages,
-			KBASE_MEM_TYPE_IMPORTED_USER_BUF);
+	reg->gpu_alloc = kbase_alloc_create(
+		kctx, *va_pages, KBASE_MEM_TYPE_IMPORTED_USER_BUF,
+		BASE_MEM_GROUP_DEFAULT);
 	if (IS_ERR_OR_NULL(reg->gpu_alloc))
 		goto no_alloc_obj;
 
@@ -1070,8 +1492,11 @@
 #else
 	mmgrab(current->mm);
 #endif
-	user_buf->pages = kmalloc_array(*va_pages, sizeof(struct page *),
-			GFP_KERNEL);
+	if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+		user_buf->pages = vmalloc(*va_pages * sizeof(struct page *));
+	else
+		user_buf->pages = kmalloc_array(*va_pages,
+				sizeof(struct page *), GFP_KERNEL);
 
 	if (!user_buf->pages)
 		goto no_page_array;
@@ -1092,7 +1517,13 @@
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0)
 	faulted_pages = get_user_pages(current, current->mm, address, *va_pages,
+#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
+KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL);
+#else
 			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
+#endif
 #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
 	faulted_pages = get_user_pages(address, *va_pages,
 			reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL);
@@ -1183,7 +1614,7 @@
 	/* mask to only allowed flags */
 	*flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
 		   BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
-		   BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+		   BASE_MEM_PROT_CPU_RD | BASE_MEM_COHERENT_SYSTEM_REQUIRED);
 
 	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
 		dev_warn(kctx->kbdev->dev,
@@ -1228,7 +1659,8 @@
 		goto no_reg;
 
 	/* zero-sized page array, as we don't need one/can support one */
-	reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS);
+	reg->gpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_ALIAS,
+		BASE_MEM_GROUP_DEFAULT);
 	if (IS_ERR_OR_NULL(reg->gpu_alloc))
 		goto no_alloc_obj;
 
@@ -1379,6 +1811,11 @@
 		goto bad_flags;
 	}
 
+	if ((*flags & BASE_MEM_UNCACHED_GPU) != 0 &&
+			(*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0) {
+		/* Remove COHERENT_SYSTEM_REQUIRED flag if uncached GPU mapping is requested */
+		*flags &= ~BASE_MEM_COHERENT_SYSTEM_REQUIRED;
+	}
 	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
 			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
 		dev_warn(kctx->kbdev->dev,
@@ -1398,7 +1835,6 @@
 	}
 
 	switch (type) {
-#ifdef CONFIG_DMA_SHARED_BUFFER
 	case BASE_MEM_IMPORT_TYPE_UMM: {
 		int fd;
 
@@ -1409,7 +1845,6 @@
 					padding);
 	}
 	break;
-#endif /* CONFIG_DMA_SHARED_BUFFER */
 	case BASE_MEM_IMPORT_TYPE_USER_BUFFER: {
 		struct base_mem_import_user_buffer user_buffer;
 		void __user *uptr;
@@ -1504,8 +1939,9 @@
 
 	/* Map the new pages into the GPU */
 	phy_pages = kbase_get_gpu_phy_pages(reg);
-	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
-			phy_pages + old_pages, delta, reg->flags, kctx->as_nr);
+	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
+		reg->start_pfn + old_pages, phy_pages + old_pages, delta,
+		reg->flags, kctx->as_nr, reg->gpu_alloc->group_id);
 
 	return ret;
 }
@@ -1582,6 +2018,12 @@
 	if (reg->flags & KBASE_REG_DONT_NEED)
 		goto out_unlock;
 
+#ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
+	/* Reject resizing commit size */
+	if (reg->flags & KBASE_REG_PF_GROW)
+		new_pages = reg->nr_pages;
+#endif
+
 	if (new_pages == reg->gpu_alloc->nents) {
 		/* no change */
 		res = 0;
@@ -1706,58 +2148,109 @@
 
 KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
 
+static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma,
+					struct kbase_va_region *reg,
+					pgoff_t *start_off,
+					size_t nr_pages)
+{
+	struct kbase_aliased *aliased =
+		reg->cpu_alloc->imported.alias.aliased;
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0))
-static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+	if (!reg->cpu_alloc->imported.alias.stride ||
+			reg->nr_pages < (*start_off + nr_pages)) {
+		return NULL;
+	}
+
+	while (*start_off >= reg->cpu_alloc->imported.alias.stride) {
+		aliased++;
+		*start_off -= reg->cpu_alloc->imported.alias.stride;
+	}
+
+	if (!aliased->alloc) {
+		/* sink page not available for dumping map */
+		return NULL;
+	}
+
+	if ((*start_off + nr_pages) > aliased->length) {
+		/* not fully backed by physical pages */
+		return NULL;
+	}
+
+	return aliased;
+}
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+static vm_fault_t kbase_cpu_vm_fault(struct vm_area_struct *vma,
+			struct vm_fault *vmf)
 {
 #else
-static int kbase_cpu_vm_fault(struct vm_fault *vmf)
+static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 #endif
 	struct kbase_cpu_mapping *map = vma->vm_private_data;
-	pgoff_t rel_pgoff;
+	pgoff_t map_start_pgoff;
+	pgoff_t fault_pgoff;
 	size_t i;
 	pgoff_t addr;
+	size_t nents;
+	struct tagged_addr *pages;
+	vm_fault_t ret = VM_FAULT_SIGBUS;
+	struct memory_group_manager_device *mgm_dev;
 
 	KBASE_DEBUG_ASSERT(map);
 	KBASE_DEBUG_ASSERT(map->count > 0);
 	KBASE_DEBUG_ASSERT(map->kctx);
 	KBASE_DEBUG_ASSERT(map->alloc);
 
-	rel_pgoff = vmf->pgoff - map->region->start_pfn;
+	map_start_pgoff = vma->vm_pgoff - map->region->start_pfn;
 
 	kbase_gpu_vm_lock(map->kctx);
-	if (rel_pgoff >= map->alloc->nents)
-		goto locked_bad_fault;
+	if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) {
+		struct kbase_aliased *aliased =
+		      get_aliased_alloc(vma, map->region, &map_start_pgoff, 1);
+
+		if (!aliased)
+			goto exit;
+
+		nents = aliased->length;
+		pages = aliased->alloc->pages + aliased->offset;
+	} else  {
+		nents = map->alloc->nents;
+		pages = map->alloc->pages;
+	}
+
+	fault_pgoff = map_start_pgoff + (vmf->pgoff - vma->vm_pgoff);
+
+	if (fault_pgoff >= nents)
+		goto exit;
 
 	/* Fault on access to DONT_NEED regions */
 	if (map->alloc->reg && (map->alloc->reg->flags & KBASE_REG_DONT_NEED))
-		goto locked_bad_fault;
+		goto exit;
 
-	/* insert all valid pages from the fault location */
-	i = rel_pgoff;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
-	addr = (pgoff_t)((uintptr_t)vmf->virtual_address >> PAGE_SHIFT);
-#else
-	addr = (pgoff_t)(vmf->address >> PAGE_SHIFT);
-#endif
-	while (i < map->alloc->nents && (addr < vma->vm_end >> PAGE_SHIFT)) {
-		int ret = vm_insert_pfn(vma, addr << PAGE_SHIFT,
-		    PFN_DOWN(as_phys_addr_t(map->alloc->pages[i])));
-		if (ret < 0 && ret != -EBUSY)
-			goto locked_bad_fault;
+	/* We are inserting all valid pages from the start of CPU mapping and
+	 * not from the fault location (the mmap handler was previously doing
+	 * the same).
+	 */
+	i = map_start_pgoff;
+	addr = (pgoff_t)(vma->vm_start >> PAGE_SHIFT);
+	mgm_dev = map->kctx->kbdev->mgm_dev;
+	while (i < nents && (addr < vma->vm_end >> PAGE_SHIFT)) {
+
+		ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
+			map->alloc->group_id, vma, addr << PAGE_SHIFT,
+			PFN_DOWN(as_phys_addr_t(pages[i])), vma->vm_page_prot);
+
+		if (ret != VM_FAULT_NOPAGE)
+			goto exit;
 
 		i++; addr++;
 	}
 
+exit:
 	kbase_gpu_vm_unlock(map->kctx);
-	/* we resolved it, nothing for VM to do */
-	return VM_FAULT_NOPAGE;
-
-locked_bad_fault:
-	kbase_gpu_vm_unlock(map->kctx);
-	return VM_FAULT_SIGBUS;
+	return ret;
 }
 
 const struct vm_operations_struct kbase_vm_ops = {
@@ -1775,9 +2268,7 @@
 		int free_on_close)
 {
 	struct kbase_cpu_mapping *map;
-	struct tagged_addr *page_array;
 	int err = 0;
-	int i;
 
 	map = kzalloc(sizeof(*map), GFP_KERNEL);
 
@@ -1809,7 +2300,18 @@
 	vma->vm_ops = &kbase_vm_ops;
 	vma->vm_private_data = map;
 
-	page_array = kbase_get_cpu_phy_pages(reg);
+	if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS && nr_pages) {
+		pgoff_t rel_pgoff = vma->vm_pgoff - reg->start_pfn +
+					(aligned_offset >> PAGE_SHIFT);
+		struct kbase_aliased *aliased =
+			get_aliased_alloc(vma, reg, &rel_pgoff, nr_pages);
+
+		if (!aliased) {
+			err = -EINVAL;
+			kfree(map);
+			goto out;
+		}
+	}
 
 	if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
 	    (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
@@ -1823,21 +2325,7 @@
 	}
 
 	if (!kaddr) {
-		unsigned long addr = vma->vm_start + aligned_offset;
-		u64 start_off = vma->vm_pgoff - reg->start_pfn +
-			(aligned_offset>>PAGE_SHIFT);
-
 		vma->vm_flags |= VM_PFNMAP;
-		for (i = 0; i < nr_pages; i++) {
-			phys_addr_t phys;
-
-			phys = as_phys_addr_t(page_array[i + start_off]);
-			err = vm_insert_pfn(vma, addr, PFN_DOWN(phys));
-			if (WARN_ON(err))
-				break;
-
-			addr += PAGE_SIZE;
-		}
 	} else {
 		WARN_ON(aligned_offset);
 		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
@@ -1867,7 +2355,28 @@
 	return err;
 }
 
-static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+#ifdef CONFIG_MALI_VECTOR_DUMP
+static void kbase_free_unused_jit_allocations(struct kbase_context *kctx)
+{
+	/* Free all cached/unused JIT allocations as their contents are not
+	 * really needed for the replay. The GPU writes to them would already
+	 * have been captured through the GWT mechanism.
+	 * This considerably reduces the size of mmu-snapshot-file and it also
+	 * helps avoid segmentation fault issue during vector dumping of
+	 * complex contents when the unused JIT allocations are accessed to
+	 * dump their contents (as they appear in the page tables snapshot)
+	 * but they got freed by the shrinker under low memory scenarios
+	 * (which do occur with complex contents).
+	 */
+	while (kbase_jit_evict(kctx))
+		;
+}
+#endif
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
+			struct vm_area_struct *vma,
+			struct kbase_va_region **const reg,
+			void **const kmap_addr)
 {
 	struct kbase_va_region *new_reg;
 	void *kaddr;
@@ -1879,6 +2388,10 @@
 	size = (vma->vm_end - vma->vm_start);
 	nr_pages = size >> PAGE_SHIFT;
 
+#ifdef CONFIG_MALI_VECTOR_DUMP
+	kbase_free_unused_jit_allocations(kctx);
+#endif
+
 	kaddr = kbase_mmu_dump(kctx, nr_pages);
 
 	if (!kaddr) {
@@ -1894,7 +2407,8 @@
 		goto out;
 	}
 
-	new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW);
+	new_reg->cpu_alloc = kbase_alloc_create(kctx, 0, KBASE_MEM_TYPE_RAW,
+		BASE_MEM_GROUP_DEFAULT);
 	if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
 		err = -ENOMEM;
 		new_reg->cpu_alloc = NULL;
@@ -2008,9 +2522,9 @@
 	return err;
 }
 
-int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+int kbase_context_mmap(struct kbase_context *const kctx,
+	struct vm_area_struct *const vma)
 {
-	struct kbase_context *kctx = file->private_data;
 	struct kbase_va_region *reg = NULL;
 	void *kaddr = NULL;
 	size_t nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
@@ -2102,7 +2616,6 @@
 				goto out_unlock;
 			}
 
-#ifdef CONFIG_DMA_SHARED_BUFFER
 			if (KBASE_MEM_TYPE_IMPORTED_UMM ==
 							reg->cpu_alloc->type) {
 				if (0 != (vma->vm_pgoff - reg->start_pfn)) {
@@ -2116,10 +2629,20 @@
 					vma, vma->vm_pgoff - reg->start_pfn);
 				goto out_unlock;
 			}
-#endif /* CONFIG_DMA_SHARED_BUFFER */
 
-			/* limit what we map to the amount currently backed */
-			if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+			if (reg->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+				/* initial params check for aliased dumping map */
+				if (nr_pages > reg->gpu_alloc->imported.alias.stride ||
+					!reg->gpu_alloc->imported.alias.stride ||
+					!nr_pages) {
+					err = -EINVAL;
+					dev_warn(dev, "mmap aliased: invalid params!\n");
+					goto out_unlock;
+				}
+			}
+			else if (reg->cpu_alloc->nents <
+					(vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+				/* limit what we map to the amount currently backed */
 				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
 					nr_pages = 0;
 				else
@@ -2150,7 +2673,7 @@
 	return err;
 }
 
-KBASE_EXPORT_TEST_API(kbase_mmap);
+KBASE_EXPORT_TEST_API(kbase_context_mmap);
 
 void kbase_sync_mem_regions(struct kbase_context *kctx,
 		struct kbase_vmap_struct *map, enum kbase_sync_type dest)
@@ -2422,134 +2945,4 @@
 
 	return 0;
 }
-void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
-{
-	int res;
-	void *va;
-	dma_addr_t  dma_pa;
-	struct kbase_va_region *reg;
-	struct tagged_addr *page_array;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
-	unsigned long attrs = DMA_ATTR_WRITE_COMBINE;
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
-	DEFINE_DMA_ATTRS(attrs);
-#endif
-
-	u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
-	u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
-		    BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
-	u32 i;
-
-	KBASE_DEBUG_ASSERT(kctx != NULL);
-	KBASE_DEBUG_ASSERT(0 != size);
-	KBASE_DEBUG_ASSERT(0 != pages);
-
-	if (size == 0)
-		goto err;
-
-	/* All the alloc calls return zeroed memory */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
-	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
-			     attrs);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL,
-			     &attrs);
-#else
-	va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
-#endif
-	if (!va)
-		goto err;
-
-	/* Store the state so we can free it later. */
-	handle->cpu_va = va;
-	handle->dma_pa = dma_pa;
-	handle->size   = size;
-
-
-	reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, pages,
-			KBASE_REG_ZONE_SAME_VA);
-	if (!reg)
-		goto no_reg;
-
-	reg->flags &= ~KBASE_REG_FREE;
-	if (kbase_update_region_flags(kctx, reg, flags) != 0)
-		goto invalid_flags;
-
-	reg->cpu_alloc = kbase_alloc_create(kctx, pages, KBASE_MEM_TYPE_RAW);
-	if (IS_ERR_OR_NULL(reg->cpu_alloc))
-		goto no_alloc;
-
-	reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
-
-	page_array = kbase_get_cpu_phy_pages(reg);
-
-	for (i = 0; i < pages; i++)
-		page_array[i] = as_tagged(dma_pa + ((dma_addr_t)i << PAGE_SHIFT));
-
-	reg->cpu_alloc->nents = pages;
-
-	kbase_gpu_vm_lock(kctx);
-	res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
-	kbase_gpu_vm_unlock(kctx);
-	if (res)
-		goto no_mmap;
-
-	return va;
-
-no_mmap:
-	kbase_mem_phy_alloc_put(reg->cpu_alloc);
-	kbase_mem_phy_alloc_put(reg->gpu_alloc);
-no_alloc:
-invalid_flags:
-	kfree(reg);
-no_reg:
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
-	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, attrs);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
-	dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
-#else
-	dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
-#endif
-err:
-	return NULL;
-}
-KBASE_EXPORT_SYMBOL(kbase_va_alloc);
-
-void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
-{
-	struct kbase_va_region *reg;
-	int err;
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) && \
-	(LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
-	DEFINE_DMA_ATTRS(attrs);
-#endif
-
-	KBASE_DEBUG_ASSERT(kctx != NULL);
-	KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
-
-	kbase_gpu_vm_lock(kctx);
-	reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
-	KBASE_DEBUG_ASSERT(reg);
-	err = kbase_gpu_munmap(kctx, reg);
-	kbase_gpu_vm_unlock(kctx);
-	KBASE_DEBUG_ASSERT(!err);
-
-	kbase_mem_phy_alloc_put(reg->cpu_alloc);
-	kbase_mem_phy_alloc_put(reg->gpu_alloc);
-	kfree(reg);
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0))
-	dma_free_attrs(kctx->kbdev->dev, handle->size,
-		       handle->cpu_va, handle->dma_pa, DMA_ATTR_WRITE_COMBINE);
-#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	dma_free_attrs(kctx->kbdev->dev, handle->size,
-			handle->cpu_va, handle->dma_pa, &attrs);
-#else
-	dma_free_writecombine(kctx->kbdev->dev, handle->size,
-				handle->cpu_va, handle->dma_pa);
-#endif
-}
-KBASE_EXPORT_SYMBOL(kbase_va_free);
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
index a8a52a7..02f1c3b 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -129,15 +129,15 @@
 int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages);
 
 /**
- * kbase_mmap - Mmap method, gets invoked when mmap system call is issued on
- *              device file /dev/malixx.
- * @file: Pointer to the device file /dev/malixx instance.
+ * kbase_context_mmap - Memory map method, gets invoked when mmap system call is
+ *                      issued on device file /dev/malixx.
+ * @kctx: The kernel context
  * @vma:  Pointer to the struct containing the info where the GPU allocation
  *        will be mapped in virtual address space of CPU.
  *
  * Return: 0 on success or error code
  */
-int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+int kbase_context_mmap(struct kbase_context *kctx, struct vm_area_struct *vma);
 
 /**
  * kbase_mem_evictable_init - Initialize the Ephemeral memory eviction
@@ -303,22 +303,6 @@
  */
 void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
 
-/** @brief Allocate memory from kernel space and map it onto the GPU
- *
- * @param kctx   The context used for the allocation/mapping
- * @param size   The size of the allocation in bytes
- * @param handle An opaque structure used to contain the state needed to free the memory
- * @return the VA for kernel space and GPU MMU
- */
-void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
-
-/** @brief Free/unmap memory allocated by kbase_va_alloc
- *
- * @param kctx   The context used for the allocation/mapping
- * @param handle An opaque structure returned by the kbase_va_alloc function.
- */
-void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
-
 extern const struct vm_operations_struct kbase_vm_ops;
 
 /**
@@ -444,4 +428,42 @@
 void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
 		struct kbase_vmap_struct *kern_mapping);
 
+/**
+ * kbase_get_cache_line_alignment - Return cache line alignment
+ *
+ * Helper function to return the maximum cache line alignment considering
+ * both CPU and GPU cache sizes.
+ *
+ * Return: CPU and GPU cache line alignment, in bytes.
+ *
+ * @kbdev: Device pointer.
+ */
+u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev);
+
+#if (KERNEL_VERSION(4, 20, 0) > LINUX_VERSION_CODE)
+static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
+			unsigned long addr, unsigned long pfn, pgprot_t pgprot)
+{
+	int err;
+
+#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
+		((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
+		 (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
+	if (pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot))
+		return VM_FAULT_SIGBUS;
+
+	err = vm_insert_pfn(vma, addr, pfn);
+#else
+	err = vm_insert_pfn_prot(vma, addr, pfn, pgprot);
+#endif
+
+	if (unlikely(err == -ENOMEM))
+		return VM_FAULT_OOM;
+	if (unlikely(err < 0 && err != -EBUSY))
+		return VM_FAULT_SIGBUS;
+
+	return VM_FAULT_NOPAGE;
+}
+#endif
+
 #endif				/* _KBASE_MEM_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
index 0f91be1..0723e32 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -155,7 +155,8 @@
 {
 	struct page *p;
 	gfp_t gfp;
-	struct device *dev = pool->kbdev->dev;
+	struct kbase_device *const kbdev = pool->kbdev;
+	struct device *const dev = kbdev->dev;
 	dma_addr_t dma_addr;
 	int i;
 
@@ -167,18 +168,21 @@
 	gfp = GFP_HIGHUSER | __GFP_ZERO;
 #endif
 
-	/* don't warn on higer order failures */
+	/* don't warn on higher order failures */
 	if (pool->order)
 		gfp |= __GFP_NOWARN;
 
-	p = alloc_pages(gfp, pool->order);
+	p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev,
+		pool->group_id, gfp, pool->order);
 	if (!p)
 		return NULL;
 
 	dma_addr = dma_map_page(dev, p, 0, (PAGE_SIZE << pool->order),
 				DMA_BIDIRECTIONAL);
+
 	if (dma_mapping_error(dev, dma_addr)) {
-		__free_pages(p, pool->order);
+		kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev,
+			pool->group_id, p, pool->order);
 		return NULL;
 	}
 
@@ -192,7 +196,8 @@
 static void kbase_mem_pool_free_page(struct kbase_mem_pool *pool,
 		struct page *p)
 {
-	struct device *dev = pool->kbdev->dev;
+	struct kbase_device *const kbdev = pool->kbdev;
+	struct device *const dev = kbdev->dev;
 	dma_addr_t dma_addr = kbase_dma_addr(p);
 	int i;
 
@@ -200,7 +205,9 @@
 		       DMA_BIDIRECTIONAL);
 	for (i = 0; i < (1u << pool->order); i++)
 		kbase_clear_dma_addr(p+i);
-	__free_pages(p, pool->order);
+
+	kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev,
+		pool->group_id, p, pool->order);
 
 	pool_dbg(pool, "freed page to kernel\n");
 }
@@ -369,14 +376,21 @@
 #endif
 
 int kbase_mem_pool_init(struct kbase_mem_pool *pool,
-		size_t max_size,
-		size_t order,
+		const struct kbase_mem_pool_config *config,
+		unsigned int order,
+		int group_id,
 		struct kbase_device *kbdev,
 		struct kbase_mem_pool *next_pool)
 {
+	if (WARN_ON(group_id < 0) ||
+		WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
+		return -EINVAL;
+	}
+
 	pool->cur_size = 0;
-	pool->max_size = max_size;
+	pool->max_size = kbase_mem_pool_config_get_max_size(config);
 	pool->order = order;
+	pool->group_id = group_id;
 	pool->kbdev = kbdev;
 	pool->next_pool = next_pool;
 	pool->dying = false;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
index 4b4eeb3..edb9cd4 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,71 +23,161 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
-#include <mali_kbase_mem_pool_debugfs.h>
+#include "mali_kbase_mem_pool_debugfs.h"
+#include "mali_kbase_debugfs_helper.h"
 
-#ifdef CONFIG_DEBUG_FS
-
-static int kbase_mem_pool_debugfs_size_get(void *data, u64 *val)
+void kbase_mem_pool_debugfs_trim(void *const array, size_t const index,
+	size_t const value)
 {
-	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+	struct kbase_mem_pool *const mem_pools = array;
 
-	*val = kbase_mem_pool_size(pool);
+	if (WARN_ON(!mem_pools) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return;
 
-	return 0;
+	kbase_mem_pool_trim(&mem_pools[index], value);
 }
 
-static int kbase_mem_pool_debugfs_size_set(void *data, u64 val)
+void kbase_mem_pool_debugfs_set_max_size(void *const array,
+	size_t const index, size_t const value)
 {
-	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+	struct kbase_mem_pool *const mem_pools = array;
 
-	kbase_mem_pool_trim(pool, val);
+	if (WARN_ON(!mem_pools) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return;
 
-	return 0;
+	kbase_mem_pool_set_max_size(&mem_pools[index], value);
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_size_fops,
-		kbase_mem_pool_debugfs_size_get,
-		kbase_mem_pool_debugfs_size_set,
-		"%llu\n");
-
-static int kbase_mem_pool_debugfs_max_size_get(void *data, u64 *val)
+size_t kbase_mem_pool_debugfs_size(void *const array, size_t const index)
 {
-	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+	struct kbase_mem_pool *const mem_pools = array;
 
-	*val = kbase_mem_pool_max_size(pool);
+	if (WARN_ON(!mem_pools) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return 0;
 
-	return 0;
+	return kbase_mem_pool_size(&mem_pools[index]);
 }
 
-static int kbase_mem_pool_debugfs_max_size_set(void *data, u64 val)
+size_t kbase_mem_pool_debugfs_max_size(void *const array, size_t const index)
 {
-	struct kbase_mem_pool *pool = (struct kbase_mem_pool *)data;
+	struct kbase_mem_pool *const mem_pools = array;
 
-	kbase_mem_pool_set_max_size(pool, val);
+	if (WARN_ON(!mem_pools) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return 0;
 
-	return 0;
+	return kbase_mem_pool_max_size(&mem_pools[index]);
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(kbase_mem_pool_debugfs_max_size_fops,
-		kbase_mem_pool_debugfs_max_size_get,
-		kbase_mem_pool_debugfs_max_size_set,
-		"%llu\n");
+void kbase_mem_pool_config_debugfs_set_max_size(void *const array,
+	size_t const index, size_t const value)
+{
+	struct kbase_mem_pool_config *const configs = array;
+
+	if (WARN_ON(!configs) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return;
+
+	kbase_mem_pool_config_set_max_size(&configs[index], value);
+}
+
+size_t kbase_mem_pool_config_debugfs_max_size(void *const array,
+	size_t const index)
+{
+	struct kbase_mem_pool_config *const configs = array;
+
+	if (WARN_ON(!configs) ||
+		WARN_ON(index >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+		return 0;
+
+	return kbase_mem_pool_config_get_max_size(&configs[index]);
+}
+
+static int kbase_mem_pool_debugfs_size_show(struct seq_file *sfile, void *data)
+{
+	CSTD_UNUSED(data);
+	return kbase_debugfs_helper_seq_read(sfile,
+		MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_size);
+}
+
+static ssize_t kbase_mem_pool_debugfs_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	int err;
+
+	CSTD_UNUSED(ppos);
+	err = kbase_debugfs_helper_seq_write(file, ubuf, count,
+		MEMORY_GROUP_MANAGER_NR_GROUPS, kbase_mem_pool_debugfs_trim);
+	return err ? err : count;
+}
+
+static int kbase_mem_pool_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbase_mem_pool_debugfs_size_show,
+		in->i_private);
+}
+
+static const struct file_operations kbase_mem_pool_debugfs_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_mem_pool_debugfs_open,
+	.read = seq_read,
+	.write = kbase_mem_pool_debugfs_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int kbase_mem_pool_debugfs_max_size_show(struct seq_file *sfile,
+	void *data)
+{
+	CSTD_UNUSED(data);
+	return kbase_debugfs_helper_seq_read(sfile,
+		MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_max_size);
+}
+
+static ssize_t kbase_mem_pool_debugfs_max_size_write(struct file *file,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	int err;
+
+	CSTD_UNUSED(ppos);
+	err = kbase_debugfs_helper_seq_write(file, ubuf, count,
+		MEMORY_GROUP_MANAGER_NR_GROUPS,
+		kbase_mem_pool_debugfs_set_max_size);
+	return err ? err : count;
+}
+
+static int kbase_mem_pool_debugfs_max_size_open(struct inode *in,
+	struct file *file)
+{
+	return single_open(file, kbase_mem_pool_debugfs_max_size_show,
+		in->i_private);
+}
+
+static const struct file_operations kbase_mem_pool_debugfs_max_size_fops = {
+	.owner = THIS_MODULE,
+	.open = kbase_mem_pool_debugfs_max_size_open,
+	.read = seq_read,
+	.write = kbase_mem_pool_debugfs_max_size_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
 
 void kbase_mem_pool_debugfs_init(struct dentry *parent,
-		struct kbase_mem_pool *pool,
-		struct kbase_mem_pool *lp_pool)
+		struct kbase_context *kctx)
 {
 	debugfs_create_file("mem_pool_size", S_IRUGO | S_IWUSR, parent,
-			pool, &kbase_mem_pool_debugfs_size_fops);
+		&kctx->mem_pools.small, &kbase_mem_pool_debugfs_fops);
 
 	debugfs_create_file("mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
-			pool, &kbase_mem_pool_debugfs_max_size_fops);
+		&kctx->mem_pools.small, &kbase_mem_pool_debugfs_max_size_fops);
 
 	debugfs_create_file("lp_mem_pool_size", S_IRUGO | S_IWUSR, parent,
-			lp_pool, &kbase_mem_pool_debugfs_size_fops);
+		&kctx->mem_pools.large, &kbase_mem_pool_debugfs_fops);
 
 	debugfs_create_file("lp_mem_pool_max_size", S_IRUGO | S_IWUSR, parent,
-			lp_pool, &kbase_mem_pool_debugfs_max_size_fops);
+		&kctx->mem_pools.large, &kbase_mem_pool_debugfs_max_size_fops);
 }
-
-#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
index 990d91c..2932945 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_debugfs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,26 +20,104 @@
  *
  */
 
-#ifndef _KBASE_MEM_POOL_DEBUGFS_H
-#define _KBASE_MEM_POOL_DEBUGFS_H
+#ifndef _KBASE_MEM_POOL_DEBUGFS_H_
+#define _KBASE_MEM_POOL_DEBUGFS_H_
 
 #include <mali_kbase.h>
 
 /**
  * kbase_mem_pool_debugfs_init - add debugfs knobs for @pool
  * @parent:  Parent debugfs dentry
- * @pool:    Memory pool of small pages to control
- * @lp_pool: Memory pool of large pages to control
+ * @kctx:    The kbase context
  *
  * Adds four debugfs files under @parent:
- * - mem_pool_size: get/set the current size of @pool
- * - mem_pool_max_size: get/set the max size of @pool
- * - lp_mem_pool_size: get/set the current size of @lp_pool
- * - lp_mem_pool_max_size: get/set the max size of @lp_pool
+ * - mem_pool_size: get/set the current sizes of @kctx: mem_pools
+ * - mem_pool_max_size: get/set the max sizes of @kctx: mem_pools
+ * - lp_mem_pool_size: get/set the current sizes of @kctx: lp_mem_pool
+ * - lp_mem_pool_max_size: get/set the max sizes of @kctx:lp_mem_pool
  */
 void kbase_mem_pool_debugfs_init(struct dentry *parent,
-		struct kbase_mem_pool *pool,
-		struct kbase_mem_pool *lp_pool);
+		struct kbase_context *kctx);
 
-#endif  /*_KBASE_MEM_POOL_DEBUGFS_H*/
+/**
+ * kbase_mem_pool_debugfs_trim - Grow or shrink a memory pool to a new size
+ *
+ * @array: Address of the first in an array of physical memory pools.
+ * @index: A memory group ID to be used as an index into the array of memory
+ *         pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @value: New number of pages in the pool.
+ *
+ * If @value > current size, fill the pool with new pages from the kernel, but
+ * not above the max_size for the pool.
+ * If @value < current size, shrink the pool by freeing pages to the kernel.
+ */
+void kbase_mem_pool_debugfs_trim(void *array, size_t index, size_t value);
+
+/**
+ * kbase_mem_pool_debugfs_set_max_size - Set maximum number of free pages in
+ *                                       memory pool
+ *
+ * @array: Address of the first in an array of physical memory pools.
+ * @index: A memory group ID to be used as an index into the array of memory
+ *         pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @value: Maximum number of free pages the pool can hold.
+ *
+ * If the maximum size is reduced, the pool will be shrunk to adhere to the
+ * new limit. For details see kbase_mem_pool_shrink().
+ */
+void kbase_mem_pool_debugfs_set_max_size(void *array, size_t index,
+	size_t value);
+
+/**
+ * kbase_mem_pool_debugfs_size - Get number of free pages in a memory pool
+ *
+ * @array: Address of the first in an array of physical memory pools.
+ * @index: A memory group ID to be used as an index into the array of memory
+ *         pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Note: the size of the pool may in certain corner cases exceed @max_size!
+ *
+ * Return: Number of free pages in the pool
+ */
+size_t kbase_mem_pool_debugfs_size(void *array, size_t index);
+
+/**
+ * kbase_mem_pool_debugfs_max_size - Get maximum number of free pages in a
+ *                                   memory pool
+ *
+ * @array: Address of the first in an array of physical memory pools.
+ * @index: A memory group ID to be used as an index into the array of memory
+ *         pools. Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return: Maximum number of free pages in the pool
+ */
+size_t kbase_mem_pool_debugfs_max_size(void *array, size_t index);
+
+/**
+ * kbase_mem_pool_config_debugfs_set_max_size - Set maximum number of free pages
+ *                                              in initial configuration of pool
+ *
+ * @array:  Array of initial configurations for a set of physical memory pools.
+ * @index:  A memory group ID to be used as an index into the array.
+ *          Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @value : Maximum number of free pages that a memory pool created from the
+ *          selected configuration can hold.
+ */
+void kbase_mem_pool_config_debugfs_set_max_size(void *array, size_t index,
+	size_t value);
+
+/**
+ * kbase_mem_pool_config_debugfs_max_size - Get maximum number of free pages
+ *                                          from initial configuration of pool
+ *
+ * @array:  Array of initial configurations for a set of physical memory pools.
+ * @index:  A memory group ID to be used as an index into the array.
+ *          Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ *
+ * Return: Maximum number of free pages that a memory pool created from the
+ *         selected configuration can hold.
+ */
+size_t kbase_mem_pool_config_debugfs_max_size(void *array, size_t index);
+
+#endif  /*_KBASE_MEM_POOL_DEBUGFS_H_ */
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c
new file mode 100644
index 0000000..aa25548
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.c
@@ -0,0 +1,115 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_pool_group.h>
+
+#include <linux/memory_group_manager.h>
+
+void kbase_mem_pool_group_config_set_max_size(
+	struct kbase_mem_pool_group_config *const configs,
+	size_t const max_size)
+{
+	size_t const large_max_size = max_size >>
+		(KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER -
+		KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);
+	int gid;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) {
+		kbase_mem_pool_config_set_max_size(&configs->small[gid],
+			max_size);
+
+		kbase_mem_pool_config_set_max_size(&configs->large[gid],
+			large_max_size);
+	}
+}
+
+int kbase_mem_pool_group_init(
+	struct kbase_mem_pool_group *const mem_pools,
+	struct kbase_device *const kbdev,
+	const struct kbase_mem_pool_group_config *const configs,
+	struct kbase_mem_pool_group *next_pools)
+{
+	int gid, err = 0;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) {
+		err = kbase_mem_pool_init(&mem_pools->small[gid],
+			&configs->small[gid],
+			KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER,
+			gid,
+			kbdev,
+			next_pools ? &next_pools->small[gid] : NULL);
+
+		if (!err) {
+			err = kbase_mem_pool_init(&mem_pools->large[gid],
+				&configs->large[gid],
+				KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER,
+				gid,
+				kbdev,
+				next_pools ? &next_pools->large[gid] : NULL);
+			if (err)
+				kbase_mem_pool_term(&mem_pools->small[gid]);
+		}
+
+		/* Break out of the loop early to avoid incrementing the count
+		 * of memory pool pairs successfully initialized.
+		 */
+		if (err)
+			break;
+	}
+
+	if (err) {
+		/* gid gives the number of memory pool pairs successfully
+		 * initialized, which is one greater than the array index of the
+		 * last group.
+		 */
+		while (gid-- > 0) {
+			kbase_mem_pool_term(&mem_pools->small[gid]);
+			kbase_mem_pool_term(&mem_pools->large[gid]);
+		}
+	}
+
+	return err;
+}
+
+void kbase_mem_pool_group_mark_dying(
+	struct kbase_mem_pool_group *const mem_pools)
+{
+	int gid;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) {
+		kbase_mem_pool_mark_dying(&mem_pools->small[gid]);
+		kbase_mem_pool_mark_dying(&mem_pools->large[gid]);
+	}
+}
+
+void kbase_mem_pool_group_term(
+	struct kbase_mem_pool_group *const mem_pools)
+{
+	int gid;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid) {
+		kbase_mem_pool_term(&mem_pools->small[gid]);
+		kbase_mem_pool_term(&mem_pools->large[gid]);
+	}
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h
new file mode 100644
index 0000000..0484f59
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_pool_group.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_MEM_POOL_GROUP_H_
+#define _KBASE_MEM_POOL_GROUP_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * kbase_mem_pool_group_config_init - Set the initial configuration for a
+ *                                    set of memory pools
+ *
+ * This function sets the initial configuration for every memory pool so that
+ * the maximum amount of free memory that each pool can hold is identical.
+ * The equivalent number of 2 MiB pages is calculated automatically for the
+ * purpose of configuring the large page pools.
+ *
+ * @configs:  Initial configuration for the set of memory pools
+ * @max_size: Maximum number of free 4 KiB pages each pool can hold
+ */
+void kbase_mem_pool_group_config_set_max_size(
+	struct kbase_mem_pool_group_config *configs, size_t max_size);
+
+/**
+ * kbase_mem_pool_group_init - Initialize a set of memory pools
+ *
+ * Initializes a complete set of physical memory pools. Memory pools are used to
+ * allow efficient reallocation of previously-freed physical pages. A pair of
+ * memory pools is initialized for each physical memory group: one for 4 KiB
+ * pages and one for 2 MiB pages.
+ *
+ * If @next_pools is not NULL then a request to allocate memory from an
+ * empty pool in @mem_pools will attempt to allocate from the equivalent pool
+ * in @next_pools before going to the memory group manager. Similarly
+ * pages can spill over to the equivalent pool in @next_pools when a pool
+ * is full in @mem_pools. Pages are zeroed before they spill over to another
+ * pool, to prevent leaking information between applications.
+ *
+ * @mem_pools:  Set of memory pools to initialize
+ * @kbdev:      Kbase device where memory is used
+ * @configs:    Initial configuration for the set of memory pools
+ * @next_pools: Set of memory pools from which to allocate memory if there
+ *              is no free memory in one of the @mem_pools
+ *
+ * Return: 0 on success, otherwise a negative error code
+ */
+int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools,
+	struct kbase_device *kbdev,
+	const struct kbase_mem_pool_group_config *configs,
+	struct kbase_mem_pool_group *next_pools);
+
+/**
+ * kbase_mem_pool_group_term - Mark a set of memory pools as dying
+ *
+ * Marks a complete set of physical memory pools previously initialized by
+ * @kbase_mem_pool_group_init as dying. This will cause any ongoing allocation
+ * operations (eg growing on page fault) to be terminated.
+ *
+ * @mem_pools: Set of memory pools to mark
+ */
+void kbase_mem_pool_group_mark_dying(struct kbase_mem_pool_group *mem_pools);
+
+/**
+ * kbase_mem_pool_group_term - Terminate a set of memory pools
+ *
+ * Terminates a complete set of physical memory pools previously initialized by
+ * @kbase_mem_pool_group_init.
+ *
+ * @mem_pools: Set of memory pools to terminate
+ */
+void kbase_mem_pool_group_term(struct kbase_mem_pool_group *mem_pools);
+
+#endif /* _KBASE_MEM_POOL_GROUP_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
index d4f8433..5d38ed2 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017, 2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -58,6 +58,7 @@
 }
 
 static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+	.owner = THIS_MODULE,
 	.open = kbasep_mem_profile_debugfs_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -75,7 +76,9 @@
 		kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED));
 
 	if (!kbase_ctx_flag(kctx, KCTX_MEM_PROFILE_INITIALIZED)) {
-		if (!debugfs_create_file("mem_profile", S_IRUGO,
+		if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+			err  = -ENOMEM;
+		} else if (!debugfs_create_file("mem_profile", 0444,
 					kctx->kctx_dentry, kctx,
 					&kbasep_mem_profile_debugfs_fops)) {
 			err = -EAGAIN;
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
index 43b0f6c..81e2886 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,7 +33,7 @@
  * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
  */
 #define KBASE_MEM_PROFILE_MAX_BUF_SIZE \
-	((size_t) (64 + ((80 + (56 * 64)) * 34) + 56))
+	((size_t) (64 + ((80 + (56 * 64)) * 50) + 56))
 
 #endif  /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h b/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h
new file mode 100644
index 0000000..99475b67
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mipe_gen_header.h
@@ -0,0 +1,120 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_mipe_proto.h"
+
+/**
+ * This header generates MIPE tracepoint declaration BLOB at
+ * compile time.
+ *
+ * Before including this header, the following parameters
+ * must be defined:
+ *
+ * MIPE_HEADER_BLOB_VAR_NAME: the name of the variable
+ * where the result BLOB will be stored.
+ *
+ * MIPE_HEADER_TP_LIST: the list of tracepoints to process.
+ * It should be defined as follows:
+ * #define MIPE_HEADER_TP_LIST \
+ *     TP_DESC(FIRST_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \
+ *     TP_DESC(SECOND_TRACEPOINT, "Some description", "@II", "first_arg,second_arg") \
+ *     etc.
+ * Where the first argument is tracepoints name, the second
+ * argument is a short tracepoint description, the third argument
+ * argument types (see MIPE documentation), and the fourth argument
+ * is comma separated argument names.
+ *
+ * MIPE_HEADER_TP_LIST_COUNT: number of entries in MIPE_HEADER_TP_LIST.
+ *
+ * MIPE_HEADER_PKT_CLASS: MIPE packet class.
+ */
+
+#if !defined(MIPE_HEADER_BLOB_VAR_NAME)
+#error "MIPE_HEADER_BLOB_VAR_NAME must be defined!"
+#endif
+
+#if !defined(MIPE_HEADER_TP_LIST)
+#error "MIPE_HEADER_TP_LIST must be defined!"
+#endif
+
+#if !defined(MIPE_HEADER_TP_LIST_COUNT)
+#error "MIPE_HEADER_TP_LIST_COUNT must be defined!"
+#endif
+
+#if !defined(MIPE_HEADER_PKT_CLASS)
+#error "MIPE_HEADER_PKT_CLASS must be defined!"
+#endif
+
+static const struct {
+	u32 _mipe_w0;
+	u32 _mipe_w1;
+	u8  _protocol_version;
+	u8  _pointer_size;
+	u32 _tp_count;
+#define TP_DESC(name, desc, arg_types, arg_names)       \
+	struct {                                        \
+		u32  _name;                             \
+		u32  _size_string_name;                 \
+		char _string_name[sizeof(#name)];       \
+		u32  _size_desc;                        \
+		char _desc[sizeof(desc)];               \
+		u32  _size_arg_types;                   \
+		char _arg_types[sizeof(arg_types)];     \
+		u32  _size_arg_names;                   \
+		char _arg_names[sizeof(arg_names)];     \
+	} __attribute__ ((__packed__)) __ ## name;
+
+	MIPE_HEADER_TP_LIST
+#undef TP_DESC
+
+} __attribute__ ((__packed__)) MIPE_HEADER_BLOB_VAR_NAME = {
+	._mipe_w0 = MIPE_PACKET_HEADER_W0(
+		TL_PACKET_FAMILY_TL,
+		MIPE_HEADER_PKT_CLASS,
+		TL_PACKET_TYPE_HEADER,
+		1),
+	._mipe_w1 = MIPE_PACKET_HEADER_W1(
+		sizeof(MIPE_HEADER_BLOB_VAR_NAME) - PACKET_HEADER_SIZE,
+		0),
+	._protocol_version = SWTRACE_VERSION,
+	._pointer_size = sizeof(void *),
+	._tp_count = MIPE_HEADER_TP_LIST_COUNT,
+#define TP_DESC(name, desc, arg_types, arg_names)       \
+	.__ ## name = {                                 \
+		._name = name,                          \
+		._size_string_name = sizeof(#name),     \
+		._string_name = #name,                  \
+		._size_desc = sizeof(desc),             \
+		._desc = desc,                          \
+		._size_arg_types = sizeof(arg_types),   \
+		._arg_types = arg_types,                \
+		._size_arg_names = sizeof(arg_names),   \
+		._arg_names = arg_names                 \
+	},
+	MIPE_HEADER_TP_LIST
+#undef TP_DESC
+};
+
+#undef MIPE_HEADER_BLOB_VAR_NAME
+#undef MIPE_HEADER_TP_LIST
+#undef MIPE_HEADER_TP_LIST_COUNT
+#undef MIPE_HEADER_PKT_CLASS
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h b/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h
new file mode 100644
index 0000000..1a0b8b4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mipe_proto.h
@@ -0,0 +1,113 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_MIPE_PROTO_H)
+#define _KBASE_MIPE_PROTO_H
+
+#define _BITFIELD_MASK_FIELD(pos, len) \
+	(((1u << len) - 1) << pos)
+
+#define _BITFIELD_SET_FIELD(pos, len, value) \
+	(_BITFIELD_MASK_FIELD(pos, len) & (((u32) value) << pos))
+
+#define BITFIELD_SET(field_name, value) \
+	_BITFIELD_SET_FIELD(field_name ## _POS, field_name ## _LEN, value)
+
+/* The version of swtrace protocol used in timeline stream. */
+#define SWTRACE_VERSION    3
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation.
+ */
+#define PACKET_STREAMID_POS  0
+#define PACKET_STREAMID_LEN  8
+#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN     8
+#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN      3
+#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN     7
+#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN    6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation.
+ */
+#define PACKET_LENGTH_POS    0
+#define PACKET_LENGTH_LEN    24
+#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN    1
+#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN     7
+
+/* First word of a MIPE packet */
+#define MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id) \
+	(0                                          \
+	| BITFIELD_SET(PACKET_FAMILY,   pkt_family) \
+	| BITFIELD_SET(PACKET_CLASS,    pkt_class)  \
+	| BITFIELD_SET(PACKET_TYPE,     pkt_type)   \
+	| BITFIELD_SET(PACKET_STREAMID, stream_id))
+
+/* Second word of a MIPE packet */
+#define MIPE_PACKET_HEADER_W1(packet_length, seqbit) \
+	(0                                           \
+	| BITFIELD_SET(PACKET_LENGTH, packet_length) \
+	| BITFIELD_SET(PACKET_SEQBIT, seqbit))
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation.
+ */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation.
+ */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation.
+ */
+enum tl_packet_family {
+	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+	TL_PACKET_FAMILY_TL = 1,   /* timeline packets */
+	TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation.
+ */
+enum tl_packet_class {
+	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation.
+ */
+enum tl_packet_type {
+	TL_PACKET_TYPE_HEADER = 0,  /* stream's header/directory */
+	TL_PACKET_TYPE_BODY = 1,    /* stream's body */
+	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+#endif /* _KBASE_MIPE_PROTO_H */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
index e2d2914..ccb63d0 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,10 +32,7 @@
 #include <linux/dma-mapping.h>
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-#include <mali_kbase_gator.h>
-#endif
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
 #include <mali_kbase_instr_defs.h>
 #include <mali_kbase_debug.h>
 
@@ -45,8 +42,9 @@
 #include <mali_kbase_hw.h>
 #include <mali_kbase_mmu_hw.h>
 #include <mali_kbase_hwaccess_jm.h>
-#include <mali_kbase_time.h>
+#include <mali_kbase_hwaccess_time.h>
 #include <mali_kbase_mem.h>
+#include <mali_kbase_reset_gpu.h>
 
 #define KBASE_MMU_PAGE_ENTRIES 512
 
@@ -114,11 +112,13 @@
  */
 
 static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
-		struct kbase_as *as, const char *reason_str);
+		struct kbase_as *as, const char *reason_str,
+		struct kbase_fault *fault);
+
 
 static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 					struct tagged_addr *phys, size_t nr,
-					unsigned long flags);
+					unsigned long flags, int group_id);
 
 /**
  * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
@@ -188,7 +188,7 @@
 	return minimum_extra + multiple - remainder;
 }
 
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 static void kbase_gpu_mmu_handle_write_faulting_as(
 				struct kbase_device *kbdev,
 				struct kbase_as *faulting_as,
@@ -213,6 +213,7 @@
 	struct kbasep_gwt_list_element *pos;
 	struct kbase_va_region *region;
 	struct kbase_device *kbdev;
+	struct kbase_fault *fault;
 	u64 fault_pfn, pfn_offset;
 	u32 op;
 	int ret;
@@ -220,24 +221,27 @@
 
 	as_no = faulting_as->number;
 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
-	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	fault = &faulting_as->pf_data;
+	fault_pfn = fault->addr >> PAGE_SHIFT;
 
 	kbase_gpu_vm_lock(kctx);
 
 	/* Find region and check if it should be writable. */
 	region = kbase_region_tracker_find_region_enclosing_address(kctx,
-			faulting_as->fault_addr);
+			fault->addr);
 	if (kbase_is_region_invalid_or_free(region)) {
 		kbase_gpu_vm_unlock(kctx);
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Memory is not mapped on the GPU");
+				"Memory is not mapped on the GPU",
+				&faulting_as->pf_data);
 		return;
 	}
 
 	if (!(region->flags & KBASE_REG_GPU_WR)) {
 		kbase_gpu_vm_unlock(kctx);
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Region does not have write permissions");
+				"Region does not have write permissions",
+				&faulting_as->pf_data);
 		return;
 	}
 
@@ -245,7 +249,7 @@
 	 * for job dumping if write tracking is enabled.
 	 */
 	if (kctx->gwt_enabled) {
-		u64 page_addr = faulting_as->fault_addr & PAGE_MASK;
+		u64 page_addr = fault->addr & PAGE_MASK;
 		bool found = false;
 		/* Check if this write was already handled. */
 		list_for_each_entry(pos, &kctx->gwt_current_list, link) {
@@ -272,7 +276,7 @@
 	/* Now make this faulting page writable to GPU. */
 	ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn,
 				&kbase_get_gpu_phy_pages(region)[pfn_offset],
-				1, region->flags);
+				1, region->flags, region->gpu_alloc->group_id);
 
 	/* flush L2 and unlock the VA (resumes the MMU) */
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
@@ -289,26 +293,24 @@
 static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
 			struct kbase_as	*faulting_as)
 {
-	u32 fault_status;
+	struct kbase_fault *fault = &faulting_as->pf_data;
 
-	fault_status = faulting_as->fault_status;
-
-	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	switch (fault->status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
 	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
 	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
 		kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
 		break;
 	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Execute Permission fault");
+				"Execute Permission fault", fault);
 		break;
 	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Read Permission fault");
+				"Read Permission fault", fault);
 		break;
 	default:
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Unknown Permission fault");
+				"Unknown Permission fault", fault);
 		break;
 	}
 }
@@ -355,13 +357,20 @@
 	bool alloc_failed = false;
 	size_t pages_still_required;
 
+	if (WARN_ON(region->gpu_alloc->group_id >=
+		MEMORY_GROUP_MANAGER_NR_GROUPS)) {
+		/* Do not try to grow the memory pool */
+		*pages_to_grow = 0;
+		return false;
+	}
+
 #ifdef CONFIG_MALI_2MB_ALLOC
 	if (new_pages >= (SZ_2M / SZ_4K)) {
-		root_pool = &kctx->lp_mem_pool;
+		root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
 		*grow_2mb_pool = true;
 	} else {
 #endif
-		root_pool = &kctx->mem_pool;
+		root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
 		*grow_2mb_pool = false;
 #ifdef CONFIG_MALI_2MB_ALLOC
 	}
@@ -522,6 +531,7 @@
 	struct kbase_context *kctx;
 	struct kbase_device *kbdev;
 	struct kbase_va_region *region;
+	struct kbase_fault *fault;
 	int err;
 	bool grown = false;
 	int pages_to_grow;
@@ -530,7 +540,8 @@
 	int i;
 
 	faulting_as = container_of(data, struct kbase_as, work_pagefault);
-	fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+	fault = &faulting_as->pf_data;
+	fault_pfn = fault->addr >> PAGE_SHIFT;
 	as_no = faulting_as->number;
 
 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
@@ -546,16 +557,16 @@
 
 	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
 
-	if (unlikely(faulting_as->protected_mode)) {
+	if (unlikely(fault->protected_mode)) {
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Protected mode fault");
+				"Protected mode fault", fault);
 		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
 				KBASE_MMU_FAULT_TYPE_PAGE);
 
 		goto fault_done;
 	}
 
-	fault_status = faulting_as->fault_status;
+	fault_status = fault->status;
 	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
 
 	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
@@ -563,7 +574,7 @@
 		break;
 
 	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 		/* If GWT was ever enabled then we need to handle
 		 * write fault pages even if the feature was disabled later.
 		 */
@@ -575,12 +586,12 @@
 #endif
 
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Permission failure");
+				"Permission failure", fault);
 		goto fault_done;
 
 	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Translation table bus fault");
+				"Translation table bus fault", fault);
 		goto fault_done;
 
 	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
@@ -591,24 +602,24 @@
 	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
 		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Address size fault");
+					"Address size fault", fault);
 		else
 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Unknown fault code");
+					"Unknown fault code", fault);
 		goto fault_done;
 
 	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
 		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Memory attributes fault");
+					"Memory attributes fault", fault);
 		else
 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Unknown fault code");
+					"Unknown fault code", fault);
 		goto fault_done;
 
 	default:
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Unknown fault code");
+				"Unknown fault code", fault);
 		goto fault_done;
 	}
 
@@ -618,7 +629,8 @@
 		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
 		if (!prealloc_sas[i]) {
 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Failed pre-allocating memory for sub-allocations' metadata");
+					"Failed pre-allocating memory for sub-allocations' metadata",
+					fault);
 			goto fault_done;
 		}
 	}
@@ -630,18 +642,25 @@
 	kbase_gpu_vm_lock(kctx);
 
 	region = kbase_region_tracker_find_region_enclosing_address(kctx,
-			faulting_as->fault_addr);
+			fault->addr);
 	if (kbase_is_region_invalid_or_free(region)) {
 		kbase_gpu_vm_unlock(kctx);
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Memory is not mapped on the GPU");
+				"Memory is not mapped on the GPU", fault);
 		goto fault_done;
 	}
 
 	if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
 		kbase_gpu_vm_unlock(kctx);
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"DMA-BUF is not mapped on the GPU");
+				"DMA-BUF is not mapped on the GPU", fault);
+		goto fault_done;
+	}
+
+	if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) {
+		kbase_gpu_vm_unlock(kctx);
+		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+				"Bad physical memory group ID", fault);
 		goto fault_done;
 	}
 
@@ -649,14 +668,14 @@
 			!= GROWABLE_FLAGS_REQUIRED) {
 		kbase_gpu_vm_unlock(kctx);
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Memory is not growable");
+				"Memory is not growable", fault);
 		goto fault_done;
 	}
 
 	if ((region->flags & KBASE_REG_DONT_NEED)) {
 		kbase_gpu_vm_unlock(kctx);
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Don't need memory can't be grown");
+				"Don't need memory can't be grown", fault);
 		goto fault_done;
 	}
 
@@ -667,7 +686,7 @@
 
 	if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
 		dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
-				faulting_as->fault_addr, region->start_pfn,
+				fault->addr, region->start_pfn,
 				region->start_pfn +
 				kbase_reg_current_backed_size(region));
 
@@ -743,9 +762,9 @@
 		 * us to unlock the MMU as we see fit.
 		 */
 		err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
-				region->start_pfn + pfn_offset,
-				&kbase_get_gpu_phy_pages(region)[pfn_offset],
-				new_pages, region->flags);
+			region->start_pfn + pfn_offset,
+			&kbase_get_gpu_phy_pages(region)[pfn_offset],
+			new_pages, region->flags, region->gpu_alloc->group_id);
 		if (err) {
 			kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
 			if (region->gpu_alloc != region->cpu_alloc)
@@ -754,13 +773,10 @@
 			kbase_gpu_vm_unlock(kctx);
 			/* The locked VA region will be unlocked and the cache invalidated in here */
 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Page table update failure");
+					"Page table update failure", fault);
 			goto fault_done;
 		}
-#if defined(CONFIG_MALI_GATOR_SUPPORT)
-		kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
-#endif
-		KBASE_TLSTREAM_AUX_PAGEFAULT(kctx->id, (u64)new_pages);
+		KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no, (u64)new_pages);
 
 		/* AS transaction begin */
 		mutex_lock(&kbdev->mmu_hw_mutex);
@@ -783,9 +799,8 @@
 					 KBASE_MMU_FAULT_TYPE_PAGE);
 
 		kbase_mmu_hw_do_operation(kbdev, faulting_as,
-					  faulting_as->fault_addr >> PAGE_SHIFT,
-					  new_pages,
-					  op, 1);
+				fault->addr >> PAGE_SHIFT,
+				new_pages, op, 1);
 
 		mutex_unlock(&kbdev->mmu_hw_mutex);
 		/* AS transaction end */
@@ -794,7 +809,7 @@
 		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
 					 KBASE_MMU_FAULT_TYPE_PAGE);
 
-#ifdef CONFIG_MALI_JOB_DUMP
+#ifdef CONFIG_MALI_CINSTR_GWT
 		if (kctx->gwt_enabled) {
 			/* GWT also tracks growable regions. */
 			struct kbasep_gwt_list_element *pos;
@@ -826,15 +841,24 @@
 #ifdef CONFIG_MALI_2MB_ALLOC
 			if (grow_2mb_pool) {
 				/* Round page requirement up to nearest 2 MB */
+				struct kbase_mem_pool *const lp_mem_pool =
+					&kctx->mem_pools.large[
+					region->gpu_alloc->group_id];
+
 				pages_to_grow = (pages_to_grow +
-					((1 << kctx->lp_mem_pool.order) - 1))
-						>> kctx->lp_mem_pool.order;
-				ret = kbase_mem_pool_grow(&kctx->lp_mem_pool,
-						pages_to_grow);
+					((1 << lp_mem_pool->order) - 1))
+						>> lp_mem_pool->order;
+
+				ret = kbase_mem_pool_grow(lp_mem_pool,
+					pages_to_grow);
 			} else {
 #endif
-				ret = kbase_mem_pool_grow(&kctx->mem_pool,
-						pages_to_grow);
+				struct kbase_mem_pool *const mem_pool =
+					&kctx->mem_pools.small[
+					region->gpu_alloc->group_id];
+
+				ret = kbase_mem_pool_grow(mem_pool,
+					pages_to_grow);
 #ifdef CONFIG_MALI_2MB_ALLOC
 			}
 #endif
@@ -842,7 +866,7 @@
 		if (ret < 0) {
 			/* failed to extend, handle as a normal PF */
 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-					"Page allocation failure");
+					"Page allocation failure", fault);
 		} else {
 			goto page_fault_retry;
 		}
@@ -868,7 +892,7 @@
 	int i;
 	struct page *p;
 
-	p = kbase_mem_pool_alloc(&kbdev->mem_pool);
+	p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
 	if (!p)
 		return 0;
 
@@ -883,15 +907,16 @@
 	if (mmut->kctx) {
 		int new_page_count;
 
-		new_page_count = kbase_atomic_add_pages(1,
-				&mmut->kctx->used_pages);
+		new_page_count = atomic_add_return(1,
+			&mmut->kctx->used_pages);
 		KBASE_TLSTREAM_AUX_PAGESALLOC(
-				mmut->kctx->id,
-				(u64)new_page_count);
+			kbdev,
+			mmut->kctx->id,
+			(u64)new_page_count);
 		kbase_process_page_usage_inc(mmut->kctx, 1);
 	}
 
-	kbase_atomic_add_pages(1, &kbdev->memdev.used_pages);
+	atomic_add(1, &kbdev->memdev.used_pages);
 
 	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
 		kbdev->mmu_mode->entry_invalidate(&page[i]);
@@ -902,7 +927,8 @@
 	return page_to_phys(p);
 
 alloc_free:
-	kbase_mem_pool_free(&kbdev->mem_pool, p, false);
+	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p,
+		false);
 
 	return 0;
 }
@@ -965,7 +991,7 @@
 static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
 					struct kbase_mmu_table *mmut,
 					u64 vpfn,
-					unsigned int level,
+					int level,
 					phys_addr_t *out_pgd)
 {
 	phys_addr_t pgd;
@@ -1021,7 +1047,7 @@
 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
 		unsigned int pcount = 0;
 		unsigned int left = to_vpfn - vpfn;
-		unsigned int level;
+		int level;
 		u64 *page;
 
 		if (count > left)
@@ -1074,7 +1100,7 @@
  */
 int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 					struct tagged_addr phys, size_t nr,
-					unsigned long flags)
+					unsigned long flags, int const group_id)
 {
 	phys_addr_t pgd;
 	u64 *pgd_page;
@@ -1085,13 +1111,13 @@
 	size_t recover_count = 0;
 	size_t remain = nr;
 	int err;
-	struct kbase_mmu_mode const *mmu_mode;
+	struct kbase_device *kbdev;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
 
-	mmu_mode = kctx->kbdev->mmu_mode;
+	kbdev = kctx->kbdev;
 
 	/* Early out if there is nothing to do */
 	if (nr == 0)
@@ -1116,7 +1142,7 @@
 		 * 256 pages at once (on average). Do we really care?
 		 */
 		do {
-			err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu,
+			err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
 					vpfn, &pgd);
 			if (err != -ENOMEM)
 				break;
@@ -1124,16 +1150,18 @@
 			 * the page walk to succeed
 			 */
 			mutex_unlock(&kctx->mmu.mmu_lock);
-			err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool,
-					MIDGARD_MMU_BOTTOMLEVEL);
+			err = kbase_mem_pool_grow(
+				&kbdev->mem_pools.small[
+					kctx->mmu.group_id],
+				MIDGARD_MMU_BOTTOMLEVEL);
 			mutex_lock(&kctx->mmu.mmu_lock);
 		} while (!err);
 		if (err) {
-			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+			dev_warn(kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
 			if (recover_required) {
 				/* Invalidate the pages we have partially
 				 * completed */
-				mmu_insert_pages_failure_recovery(kctx->kbdev,
+				mmu_insert_pages_failure_recovery(kbdev,
 						&kctx->mmu,
 						recover_vpfn,
 						recover_vpfn + recover_count);
@@ -1144,11 +1172,11 @@
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+			dev_warn(kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
 			if (recover_required) {
 				/* Invalidate the pages we have partially
 				 * completed */
-				mmu_insert_pages_failure_recovery(kctx->kbdev,
+				mmu_insert_pages_failure_recovery(kbdev,
 						&kctx->mmu,
 						recover_vpfn,
 						recover_vpfn + recover_count);
@@ -1163,15 +1191,14 @@
 			/* Fail if the current page is a valid ATE entry */
 			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
 
-			mmu_mode->entry_set_ate(&pgd_page[ofs],
-						phys, flags,
-						MIDGARD_MMU_BOTTOMLEVEL);
+			pgd_page[ofs] = kbase_mmu_create_ate(kbdev,
+				phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id);
 		}
 
 		vpfn += count;
 		remain -= count;
 
-		kbase_mmu_sync_pgd(kctx->kbdev,
+		kbase_mmu_sync_pgd(kbdev,
 				kbase_dma_addr(p) + (index * sizeof(u64)),
 				count * sizeof(u64));
 
@@ -1200,23 +1227,36 @@
 
 	tmp_pgd = kbdev->mmu_mode->pte_to_phy_addr(*pte);
 	tmp_p = phys_to_page(tmp_pgd);
-	kbase_mem_pool_free(&kbdev->mem_pool, tmp_p, false);
+	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
+		tmp_p, false);
 
 	/* If the MMU tables belong to a context then we accounted the memory
 	 * usage to that context, so decrement here.
 	 */
 	if (mmut->kctx) {
 		kbase_process_page_usage_dec(mmut->kctx, 1);
-		kbase_atomic_sub_pages(1, &mmut->kctx->used_pages);
+		atomic_sub(1, &mmut->kctx->used_pages);
 	}
-	kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages);
+	atomic_sub(1, &kbdev->memdev.used_pages);
+}
+
+u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
+	struct tagged_addr const phy, unsigned long const flags,
+	int const level, int const group_id)
+{
+	u64 entry;
+
+	kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level);
+	return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev,
+		group_id, level, entry);
 }
 
 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
 				    struct kbase_mmu_table *mmut,
 				    const u64 start_vpfn,
 				    struct tagged_addr *phys, size_t nr,
-				    unsigned long flags)
+				    unsigned long flags,
+				    int const group_id)
 {
 	phys_addr_t pgd;
 	u64 *pgd_page;
@@ -1242,7 +1282,7 @@
 		unsigned int vindex = insert_vpfn & 0x1FF;
 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
 		struct page *p;
-		unsigned int cur_level;
+		int cur_level;
 
 		if (count > remain)
 			count = remain;
@@ -1268,8 +1308,9 @@
 			 * the page walk to succeed
 			 */
 			mutex_unlock(&mmut->mmu_lock);
-			err = kbase_mem_pool_grow(&kbdev->mem_pool,
-					cur_level);
+			err = kbase_mem_pool_grow(
+				&kbdev->mem_pools.small[mmut->group_id],
+				cur_level);
 			mutex_lock(&mmut->mmu_lock);
 		} while (!err);
 
@@ -1301,13 +1342,13 @@
 		}
 
 		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
-			unsigned int level_index = (insert_vpfn >> 9) & 0x1FF;
+			int level_index = (insert_vpfn >> 9) & 0x1FF;
 			u64 *target = &pgd_page[level_index];
 
 			if (mmu_mode->pte_is_valid(*target, cur_level))
 				cleanup_empty_pte(kbdev, mmut, target);
-			mmu_mode->entry_set_ate(target, *phys, flags,
-						cur_level);
+			*target = kbase_mmu_create_ate(kbdev, *phys, flags,
+				cur_level, group_id);
 		} else {
 			for (i = 0; i < count; i++) {
 				unsigned int ofs = vindex + i;
@@ -1322,8 +1363,8 @@
 				 */
 				WARN_ON((*target & 1UL) != 0);
 
-				kbdev->mmu_mode->entry_set_ate(target,
-						phys[i], flags, cur_level);
+				*target = kbase_mmu_create_ate(kbdev,
+					phys[i], flags, cur_level, group_id);
 			}
 		}
 
@@ -1352,12 +1393,12 @@
 int kbase_mmu_insert_pages(struct kbase_device *kbdev,
 		struct kbase_mmu_table *mmut, u64 vpfn,
 		struct tagged_addr *phys, size_t nr,
-		unsigned long flags, int as_nr)
+		unsigned long flags, int as_nr, int const group_id)
 {
 	int err;
 
 	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn,
-			phys, nr, flags);
+			phys, nr, flags, group_id);
 
 	if (mmut->kctx)
 		kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false);
@@ -1399,7 +1440,6 @@
 	err = kbase_mmu_hw_do_operation(kbdev,
 				&kbdev->as[kctx->as_nr],
 				vpfn, nr, op, 0);
-#if KBASE_GPU_RESET_EN
 	if (err) {
 		/* Flush failed to complete, assume the
 		 * GPU has hung and perform a reset to
@@ -1409,7 +1449,6 @@
 		if (kbase_prepare_to_reset_gpu_locked(kbdev))
 			kbase_reset_gpu_locked(kbdev);
 	}
-#endif /* KBASE_GPU_RESET_EN */
 
 #ifndef CONFIG_MALI_NO_MALI
 	/*
@@ -1418,7 +1457,7 @@
 	 * request will pick it up.
 	 */
 	if ((!err) && sync &&
-			kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367))
+			kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
 		atomic_set(&kctx->drain_pending, 1);
 #endif /* !CONFIG_MALI_NO_MALI */
 }
@@ -1449,7 +1488,6 @@
 	err = kbase_mmu_hw_do_operation(kbdev,
 			as, vpfn, nr, op, 0);
 
-#if KBASE_GPU_RESET_EN
 	if (err) {
 		/* Flush failed to complete, assume the GPU has hung and
 		 * perform a reset to recover
@@ -1459,7 +1497,6 @@
 		if (kbase_prepare_to_reset_gpu(kbdev))
 			kbase_reset_gpu(kbdev);
 	}
-#endif /* KBASE_GPU_RESET_EN */
 
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 	/* AS transaction end */
@@ -1600,7 +1637,7 @@
 		unsigned int index = vpfn & 0x1FF;
 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
 		unsigned int pcount;
-		unsigned int level;
+		int level;
 		u64 *page;
 
 		if (count > nr)
@@ -1716,15 +1753,17 @@
  *         current mappings
  * @nr:    Number of pages to update
  * @flags: Flags
+ * @group_id: The physical memory group in which the page was allocated.
+ *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
  */
 static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 					struct tagged_addr *phys, size_t nr,
-					unsigned long flags)
+					unsigned long flags, int const group_id)
 {
 	phys_addr_t pgd;
 	u64 *pgd_page;
-	struct kbase_mmu_mode const *mmu_mode;
 	int err;
+	struct kbase_device *kbdev;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
@@ -1735,7 +1774,7 @@
 
 	mutex_lock(&kctx->mmu.mmu_lock);
 
-	mmu_mode = kctx->kbdev->mmu_mode;
+	kbdev = kctx->kbdev;
 
 	while (nr) {
 		unsigned int i;
@@ -1747,7 +1786,7 @@
 			count = nr;
 
 		do {
-			err = mmu_get_bottom_pgd(kctx->kbdev, &kctx->mmu,
+			err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
 					vpfn, &pgd);
 			if (err != -ENOMEM)
 				break;
@@ -1755,12 +1794,14 @@
 			 * the page walk to succeed
 			 */
 			mutex_unlock(&kctx->mmu.mmu_lock);
-			err = kbase_mem_pool_grow(&kctx->kbdev->mem_pool,
-					MIDGARD_MMU_BOTTOMLEVEL);
+			err = kbase_mem_pool_grow(
+				&kbdev->mem_pools.small[
+					kctx->mmu.group_id],
+				MIDGARD_MMU_BOTTOMLEVEL);
 			mutex_lock(&kctx->mmu.mmu_lock);
 		} while (!err);
 		if (err) {
-			dev_warn(kctx->kbdev->dev,
+			dev_warn(kbdev->dev,
 				 "mmu_get_bottom_pgd failure\n");
 			goto fail_unlock;
 		}
@@ -1768,20 +1809,21 @@
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kctx->kbdev->dev, "kmap failure\n");
+			dev_warn(kbdev->dev, "kmap failure\n");
 			err = -ENOMEM;
 			goto fail_unlock;
 		}
 
 		for (i = 0; i < count; i++)
-			mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
-						flags, MIDGARD_MMU_BOTTOMLEVEL);
+			pgd_page[index + i] = kbase_mmu_create_ate(kbdev,
+				phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
+				group_id);
 
 		phys += count;
 		vpfn += count;
 		nr -= count;
 
-		kbase_mmu_sync_pgd(kctx->kbdev,
+		kbase_mmu_sync_pgd(kbdev,
 				kbase_dma_addr(p) + (index * sizeof(u64)),
 				count * sizeof(u64));
 
@@ -1798,11 +1840,12 @@
 
 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr,
-			   unsigned long flags)
+			   unsigned long flags, int const group_id)
 {
 	int err;
 
-	err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags);
+	err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags,
+		group_id);
 	kbase_mmu_flush_invalidate(kctx, vpfn, nr, true);
 	return err;
 }
@@ -1845,21 +1888,30 @@
 	}
 
 	p = pfn_to_page(PFN_DOWN(pgd));
-	kbase_mem_pool_free(&kbdev->mem_pool, p, true);
-	kbase_atomic_sub_pages(1, &kbdev->memdev.used_pages);
+
+	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
+		p, true);
+
+	atomic_sub(1, &kbdev->memdev.used_pages);
 
 	/* If MMU tables belong to a context then pages will have been accounted
 	 * against it, so we must decrement the usage counts here.
 	 */
 	if (mmut->kctx) {
 		kbase_process_page_usage_dec(mmut->kctx, 1);
-		kbase_atomic_sub_pages(1, &mmut->kctx->used_pages);
+		atomic_sub(1, &mmut->kctx->used_pages);
 	}
 }
 
-int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-		struct kbase_context *kctx)
+int kbase_mmu_init(struct kbase_device *const kbdev,
+	struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
+	int const group_id)
 {
+	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
+	    WARN_ON(group_id < 0))
+		return -EINVAL;
+
+	mmut->group_id = group_id;
 	mutex_init(&mmut->mmu_lock);
 	mmut->kctx = kctx;
 
@@ -1877,8 +1929,9 @@
 	while (!mmut->pgd) {
 		int err;
 
-		err = kbase_mem_pool_grow(&kbdev->mem_pool,
-				MIDGARD_MMU_BOTTOMLEVEL);
+		err = kbase_mem_pool_grow(
+			&kbdev->mem_pools.small[mmut->group_id],
+			MIDGARD_MMU_BOTTOMLEVEL);
 		if (err) {
 			kbase_mmu_term(kbdev, mmut);
 			return -ENOMEM;
@@ -1901,7 +1954,7 @@
 		mutex_unlock(&mmut->mmu_lock);
 
 		if (mmut->kctx)
-			KBASE_TLSTREAM_AUX_PAGESALLOC(mmut->kctx->id, 0);
+			KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
 	}
 
 	kfree(mmut->mmu_teardown_pages);
@@ -1915,16 +1968,18 @@
 	int i;
 	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
 	size_t dump_size;
+	struct kbase_device *kbdev;
 	struct kbase_mmu_mode const *mmu_mode;
 
 	KBASE_DEBUG_ASSERT(NULL != kctx);
 	lockdep_assert_held(&kctx->mmu.mmu_lock);
 
-	mmu_mode = kctx->kbdev->mmu_mode;
+	kbdev = kctx->kbdev;
+	mmu_mode = kbdev->mmu_mode;
 
 	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
 	if (!pgd_page) {
-		dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+		dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
 		return 0;
 	}
 
@@ -1991,23 +2046,20 @@
 		char *mmu_dump_buffer;
 		u64 config[3];
 		size_t dump_size, size = 0;
+		struct kbase_mmu_setup as_setup;
 
 		buffer = (char *)kaddr;
 		mmu_dump_buffer = buffer;
 
-		if (kctx->api_version >= KBASE_API_VERSION(8, 4)) {
-			struct kbase_mmu_setup as_setup;
-
-			kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu,
-					&as_setup);
-			config[0] = as_setup.transtab;
-			config[1] = as_setup.memattr;
-			config[2] = as_setup.transcfg;
-			memcpy(buffer, &config, sizeof(config));
-			mmu_dump_buffer += sizeof(config);
-			size_left -= sizeof(config);
-			size += sizeof(config);
-		}
+		kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu,
+				&as_setup);
+		config[0] = as_setup.transtab;
+		config[1] = as_setup.memattr;
+		config[2] = as_setup.transcfg;
+		memcpy(buffer, &config, sizeof(config));
+		mmu_dump_buffer += sizeof(config);
+		size_left -= sizeof(config);
+		size += sizeof(config);
 
 		dump_size = kbasep_mmu_dump_level(kctx,
 				kctx->mmu.pgd,
@@ -2048,18 +2100,22 @@
 	int as_no;
 	struct kbase_context *kctx;
 	struct kbase_device *kbdev;
-#if KBASE_GPU_RESET_EN
+	struct kbase_fault *fault;
 	bool reset_status = false;
-#endif /* KBASE_GPU_RESET_EN */
 
 	faulting_as = container_of(data, struct kbase_as, work_busfault);
+	fault = &faulting_as->bf_data;
+
+	/* Ensure that any pending page fault worker has completed */
+	flush_work(&faulting_as->work_pagefault);
 
 	as_no = faulting_as->number;
 
 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
 
-	/* Grab the context that was already refcounted in kbase_mmu_interrupt().
-	 * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+	/* Grab the context, already refcounted in kbase_mmu_interrupt() on
+	 * flagging of the bus-fault. Therefore, it cannot be scheduled out of
+	 * this AS until we explicitly release it
 	 */
 	kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
 	if (WARN_ON(!kctx)) {
@@ -2067,9 +2123,9 @@
 		return;
 	}
 
-	if (unlikely(faulting_as->protected_mode)) {
+	if (unlikely(fault->protected_mode)) {
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
-				"Permission failure");
+				"Permission failure", fault);
 		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
 				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
 		kbasep_js_runpool_release_ctx(kbdev, kctx);
@@ -2078,7 +2134,6 @@
 
 	}
 
-#if KBASE_GPU_RESET_EN
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
 		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
 		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
@@ -2087,7 +2142,6 @@
 		dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
 		reset_status = kbase_prepare_to_reset_gpu(kbdev);
 	}
-#endif /* KBASE_GPU_RESET_EN */
 	/* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
 	if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
 		unsigned long flags;
@@ -2112,10 +2166,8 @@
 		kbase_pm_context_idle(kbdev);
 	}
 
-#if KBASE_GPU_RESET_EN
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
 		kbase_reset_gpu(kbdev);
-#endif /* KBASE_GPU_RESET_EN */
 
 	kbasep_js_runpool_release_ctx(kbdev, kctx);
 
@@ -2311,11 +2363,13 @@
 	}
 }
 
+
 /**
  * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
  */
 static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
-		struct kbase_as *as, const char *reason_str)
+		struct kbase_as *as, const char *reason_str,
+		struct kbase_fault *fault)
 {
 	unsigned long flags;
 	int exception_type;
@@ -2325,9 +2379,7 @@
 	struct kbase_device *kbdev;
 	struct kbasep_js_device_data *js_devdata;
 
-#if KBASE_GPU_RESET_EN
 	bool reset_status = false;
-#endif
 
 	as_no = as->number;
 	kbdev = kctx->kbdev;
@@ -2337,9 +2389,9 @@
 	KBASE_DEBUG_ASSERT(atomic_read(&kctx->refcount) > 0);
 
 	/* decode the fault status */
-	exception_type = as->fault_status & 0xFF;
-	access_type = (as->fault_status >> 8) & 0x3;
-	source_id = (as->fault_status >> 16);
+	exception_type = fault->status & 0xFF;
+	access_type = (fault->status >> 8) & 0x3;
+	source_id = (fault->status >> 16);
 
 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
@@ -2351,12 +2403,12 @@
 		"access type 0x%X: %s\n"
 		"source id 0x%X\n"
 		"pid: %d\n",
-		as_no, as->fault_addr,
+		as_no, fault->addr,
 		reason_str,
-		as->fault_status,
-		(as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+		fault->status,
+		(fault->status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
 		exception_type, kbase_exception_name(kbdev, exception_type),
-		access_type, access_type_name(kbdev, as->fault_status),
+		access_type, access_type_name(kbdev, fault->status),
 		source_id,
 		kctx->pid);
 
@@ -2364,11 +2416,9 @@
 	if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
 			(kbdev->hwcnt.backend.state ==
 						KBASE_INSTR_STATE_DUMPING)) {
-		unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
-
-		if ((as->fault_addr >= kbdev->hwcnt.addr) &&
-				(as->fault_addr < (kbdev->hwcnt.addr +
-						(num_core_groups * 2048))))
+		if ((fault->addr >= kbdev->hwcnt.addr) &&
+				(fault->addr < (kbdev->hwcnt.addr +
+					kbdev->hwcnt.addr_bytes)))
 			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
 	}
 
@@ -2376,14 +2426,14 @@
 	 * out/rescheduled - this will occur on releasing the context's refcount */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbasep_js_clear_submit_allowed(js_devdata, kctx);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	/* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
 	 * context can appear in the job slots from this point on */
-	kbase_backend_jm_kill_jobs_from_kctx(kctx);
+	kbase_backend_jm_kill_running_jobs_from_kctx(kctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
 	/* AS transaction begin */
 	mutex_lock(&kbdev->mmu_hw_mutex);
-#if KBASE_GPU_RESET_EN
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
 		/* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
 		 * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
@@ -2392,13 +2442,14 @@
 		dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
 		reset_status = kbase_prepare_to_reset_gpu(kbdev);
 	}
-#endif /* KBASE_GPU_RESET_EN */
 	/* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_mmu_disable(kctx);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	mutex_unlock(&kbdev->mmu_hw_mutex);
+
+
 	/* AS transaction end */
 	/* Clear down the fault */
 	kbase_mmu_hw_clear_fault(kbdev, as,
@@ -2406,10 +2457,8 @@
 	kbase_mmu_hw_enable_fault(kbdev, as,
 			KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
 
-#if KBASE_GPU_RESET_EN
 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
 		kbase_reset_gpu(kbdev);
-#endif /* KBASE_GPU_RESET_EN */
 }
 
 void kbasep_as_do_poke(struct work_struct *work)
@@ -2568,35 +2617,35 @@
 	katom->poking = 0;
 }
 
-void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+		struct kbase_context *kctx, struct kbase_as *as,
+		struct kbase_fault *fault)
 {
-	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
-
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	if (!kctx) {
-		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
-				 kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
-				 as->number, as->fault_addr);
+		dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Spurious IRQ or SW Design Error?\n",
+				kbase_as_has_bus_fault(as, fault) ?
+						"Bus error" : "Page fault",
+				as->number, fault->addr);
 
 		/* Since no ctx was found, the MMU must be disabled. */
 		WARN_ON(as->current_setup.transtab);
 
-		if (kbase_as_has_bus_fault(as)) {
+		if (kbase_as_has_bus_fault(as, fault)) {
 			kbase_mmu_hw_clear_fault(kbdev, as,
 					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
 			kbase_mmu_hw_enable_fault(kbdev, as,
 					KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
-		} else if (kbase_as_has_page_fault(as)) {
+		} else if (kbase_as_has_page_fault(as, fault)) {
 			kbase_mmu_hw_clear_fault(kbdev, as,
 					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
 			kbase_mmu_hw_enable_fault(kbdev, as,
 					KBASE_MMU_FAULT_TYPE_PAGE_UNEXPECTED);
 		}
 
-#if KBASE_GPU_RESET_EN
-		if (kbase_as_has_bus_fault(as) &&
-				kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+		if (kbase_as_has_bus_fault(as, fault) &&
+			    kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
 			bool reset_status;
 			/*
 			 * Reset the GPU, like in bus_fault_worker, in case an
@@ -2608,12 +2657,13 @@
 			if (reset_status)
 				kbase_reset_gpu_locked(kbdev);
 		}
-#endif /* KBASE_GPU_RESET_EN */
 
 		return;
 	}
 
-	if (kbase_as_has_bus_fault(as)) {
+	if (kbase_as_has_bus_fault(as, fault)) {
+		struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
 		/*
 		 * hw counters dumping in progress, signal the
 		 * other thread that it failed
@@ -2634,11 +2684,11 @@
 		if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_AARCH64_MMU))
 			dev_warn(kbdev->dev,
 					"Bus error in AS%d at VA=0x%016llx, IPA=0x%016llx\n",
-					as->number, as->fault_addr,
-					as->fault_extra_addr);
+					as->number, fault->addr,
+					fault->extra_addr);
 		else
 			dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
-					as->number, as->fault_addr);
+					as->number, fault->addr);
 
 		/*
 		 * We need to switch to UNMAPPED mode - but we do this in a
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
index 70d5f2b..f49a1d4 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015,2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,8 +35,8 @@
  * be provided.
  */
 
-#ifndef _MALI_KBASE_MMU_HW_H_
-#define _MALI_KBASE_MMU_HW_H_
+#ifndef _KBASE_MMU_HW_H_
+#define _KBASE_MMU_HW_H_
 
 /* Forward declarations */
 struct kbase_device;
@@ -121,4 +121,4 @@
 /** @} *//* end group mali_kbase_mmu_hw */
 /** @} *//* end group base_kbase_api */
 
-#endif	/* _MALI_KBASE_MMU_HW_H_ */
+#endif	/* _KBASE_MMU_HW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
index 38ca456..7b9cc0c 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_aarch64.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2014, 2016-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2014, 2016-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -130,7 +130,7 @@
 	return entry & ~0xFFF;
 }
 
-static int ate_is_valid(u64 ate, unsigned int level)
+static int ate_is_valid(u64 ate, int const level)
 {
 	if (level == MIDGARD_MMU_BOTTOMLEVEL)
 		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L3);
@@ -138,7 +138,7 @@
 		return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE_L02);
 }
 
-static int pte_is_valid(u64 pte, unsigned int level)
+static int pte_is_valid(u64 pte, int const level)
 {
 	/* PTEs cannot exist at the bottom level */
 	if (level == MIDGARD_MMU_BOTTOMLEVEL)
@@ -181,7 +181,7 @@
 static void entry_set_ate(u64 *entry,
 		struct tagged_addr phy,
 		unsigned long flags,
-		unsigned int level)
+		int const level)
 {
 	if (level == MIDGARD_MMU_BOTTOMLEVEL)
 		page_table_entry_set(entry, as_phys_addr_t(phy) |
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
index f6bdf91..7ec90cf 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -130,12 +130,12 @@
 	return entry & ~0xFFF;
 }
 
-static int ate_is_valid(u64 ate, unsigned int level)
+static int ate_is_valid(u64 ate, int const level)
 {
 	return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
 }
 
-static int pte_is_valid(u64 pte, unsigned int level)
+static int pte_is_valid(u64 pte, int const level)
 {
 	return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
 }
@@ -179,7 +179,7 @@
 static void entry_set_ate(u64 *entry,
 		struct tagged_addr phy,
 		unsigned long flags,
-		unsigned int level)
+		int const level)
 {
 	page_table_entry_set(entry, as_phys_addr_t(phy) | get_mmu_flags(flags) |
 			     ENTRY_IS_ATE);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c b/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c
new file mode 100644
index 0000000..38ae46e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_native_mgm.c
@@ -0,0 +1,153 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/memory_group_manager.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_native_mgm.h>
+
+/**
+ * kbase_native_mgm_alloc - Native physical memory allocation method
+ *
+ * @mgm_dev:  The memory group manager the request is being made through.
+ * @group_id: A physical memory group ID, which must be valid but is not used.
+ *            Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+ * @gfp_mask: Bitmask of Get Free Page flags affecting allocator behavior.
+ * @order:    Page order for physical page size (order=0 means 4 KiB,
+ *            order=9 means 2 MiB).
+ *
+ * Delegates all memory allocation requests to the kernel's alloc_pages
+ * function.
+ *
+ * Return: Pointer to allocated page, or NULL if allocation failed.
+ */
+static struct page *kbase_native_mgm_alloc(
+	struct memory_group_manager_device *mgm_dev, int group_id,
+	gfp_t gfp_mask, unsigned int order)
+{
+	/*
+	 * Check that the base and the mgm defines, from separate header files,
+	 * for the max number of memory groups are compatible.
+	 */
+	BUILD_BUG_ON(BASE_MEM_GROUP_COUNT != MEMORY_GROUP_MANAGER_NR_GROUPS);
+	/*
+	 * Check that the mask used for storing the memory group ID is big
+	 * enough for the largest possible memory group ID.
+	 */
+	BUILD_BUG_ON((BASEP_CONTEXT_MMU_GROUP_ID_MASK
+				>> BASEP_CONTEXT_MMU_GROUP_ID_SHIFT)
+			< (BASE_MEM_GROUP_COUNT - 1));
+
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(group_id);
+
+	return alloc_pages(gfp_mask, order);
+}
+
+/**
+ * kbase_native_mgm_free - Native physical memory freeing method
+ *
+ * @mgm_dev:  The memory group manager the request is being made through.
+ * @group_id: A physical memory group ID, which must be valid but is not used.
+ *            Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+ * @page:     Address of the struct associated with a page of physical
+ *            memory that was allocated by calling kbase_native_mgm_alloc
+ *            with the same argument values.
+ * @order:    Page order for physical page size (order=0 means 4 KiB,
+ *            order=9 means 2 MiB).
+ *
+ * Delegates all memory freeing requests to the kernel's __free_pages function.
+ */
+static void kbase_native_mgm_free(struct memory_group_manager_device *mgm_dev,
+	int group_id, struct page *page, unsigned int order)
+{
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(group_id);
+
+	__free_pages(page, order);
+}
+
+/**
+ * kbase_native_mgm_vmf_insert_pfn_prot - Native method to map a page on the CPU
+ *
+ * @mgm_dev:  The memory group manager the request is being made through.
+ * @group_id: A physical memory group ID, which must be valid but is not used.
+ *            Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+ * @vma:      The virtual memory area to insert the page into.
+ * @addr:     An address contained in @vma to assign to the inserted page.
+ * @pfn:      The kernel Page Frame Number to insert at @addr in @vma.
+ * @pgprot:   Protection flags for the inserted page.
+ *
+ * Called from a CPU virtual memory page fault handler. Delegates all memory
+ * mapping requests to the kernel's vmf_insert_pfn_prot function.
+ *
+ * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page table
+ *         entry was successfully installed.
+ */
+static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot(
+		struct memory_group_manager_device *mgm_dev, int group_id,
+		struct vm_area_struct *vma, unsigned long addr,
+		unsigned long pfn, pgprot_t pgprot)
+{
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(group_id);
+
+	return vmf_insert_pfn_prot(vma, addr, pfn, pgprot);
+}
+
+/**
+ * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table
+ *                                   entry
+ *
+ * @mgm_dev:   The memory group manager the request is being made through.
+ * @group_id:  A physical memory group ID, which must be valid but is not used.
+ *             Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+ * @mmu_level: The level of the MMU page table where the page is getting mapped.
+ * @pte:       The prepared page table entry.
+ *
+ * This function simply returns the @pte without modification.
+ *
+ * Return: A GPU page table entry to be stored in a page table.
+ */
+static u64
+kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev,
+			      int group_id, int mmu_level, u64 pte)
+{
+	CSTD_UNUSED(mgm_dev);
+	CSTD_UNUSED(group_id);
+	CSTD_UNUSED(mmu_level);
+
+	return pte;
+}
+
+struct memory_group_manager_device kbase_native_mgm_dev = {
+	.ops = {
+		.mgm_alloc_page = kbase_native_mgm_alloc,
+		.mgm_free_page = kbase_native_mgm_free,
+		.mgm_get_import_memory_id = NULL,
+		.mgm_vmf_insert_pfn_prot = kbase_native_mgm_vmf_insert_pfn_prot,
+		.mgm_update_gpu_pte = kbase_native_mgm_update_gpu_pte,
+	},
+	.data = NULL
+};
diff --git a/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h b/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h
new file mode 100644
index 0000000..431b1f4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_native_mgm.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_NATIVE_MGM_H_
+#define _KBASE_NATIVE_MGM_H_
+
+#include <linux/memory_group_manager.h>
+
+/**
+ * kbase_native_mgm_dev - Native memory group manager device
+ *
+ * An implementation of the memory group manager interface that is intended for
+ * internal use when no platform-specific memory group manager is available.
+ *
+ * It ignores the specified group ID and delegates to the kernel's physical
+ * memory allocation and freeing functions.
+ */
+extern struct memory_group_manager_device kbase_native_mgm_dev;
+
+#endif /* _KBASE_NATIVE_MGM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c
index d5b8c77..5699eb8 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_pm.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -30,6 +30,7 @@
 #include <mali_kbase.h>
 #include <mali_midg_regmap.h>
 #include <mali_kbase_vinstr.h>
+#include <mali_kbase_hwcnt_context.h>
 
 #include <mali_kbase_pm.h>
 
@@ -83,10 +84,6 @@
 		 * the policy */
 		kbase_hwaccess_pm_gpu_active(kbdev);
 	}
-#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
-	if (kbdev->ipa.gpu_active_callback)
-		kbdev->ipa.gpu_active_callback(kbdev->ipa.model_data);
-#endif
 
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
@@ -118,25 +115,11 @@
 
 		/* Wake up anyone waiting for this to become 0 (e.g. suspend). The
 		 * waiters must synchronize with us by locking the pm.lock after
-		 * waiting */
+		 * waiting.
+		 */
 		wake_up(&kbdev->pm.zero_active_count_wait);
 	}
 
-#if defined(CONFIG_DEVFREQ_THERMAL) && defined(CONFIG_MALI_DEVFREQ)
-	/* IPA may be using vinstr, in which case there may be one PM reference
-	 * still held when all other contexts have left the GPU. Inform IPA that
-	 * the GPU is now idle so that vinstr can drop it's reference.
-	 *
-	 * If the GPU was only briefly active then it might have gone idle
-	 * before vinstr has taken a PM reference, meaning that active_count is
-	 * zero. We still need to inform IPA in this case, so that vinstr can
-	 * drop the PM reference and avoid keeping the GPU powered
-	 * unnecessarily.
-	 */
-	if (c <= 1 && kbdev->ipa.gpu_idle_callback)
-		kbdev->ipa.gpu_idle_callback(kbdev->ipa.model_data);
-#endif
-
 	mutex_unlock(&kbdev->pm.lock);
 	mutex_unlock(&js_devdata->runpool_mutex);
 }
@@ -147,10 +130,16 @@
 {
 	KBASE_DEBUG_ASSERT(kbdev);
 
-	/* Suspend vinstr.
-	 * This call will block until vinstr is suspended. */
+	/* Suspend vinstr. This blocks until the vinstr worker and timer are
+	 * no longer running.
+	 */
 	kbase_vinstr_suspend(kbdev->vinstr_ctx);
 
+	/* Disable GPU hardware counters.
+	 * This call will block until counters are disabled.
+	 */
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
 	mutex_lock(&kbdev->pm.lock);
 	KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
 	kbdev->pm.suspending = true;
@@ -177,6 +166,8 @@
 
 void kbase_pm_resume(struct kbase_device *kbdev)
 {
+	unsigned long flags;
+
 	/* MUST happen before any pm_context_active calls occur */
 	kbase_hwaccess_pm_resume(kbdev);
 
@@ -195,7 +186,11 @@
 	 * need it and the policy doesn't want it on */
 	kbase_pm_context_idle(kbdev);
 
-	/* Resume vinstr operation */
+	/* Re-enable GPU hardware counters */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* Resume vinstr */
 	kbase_vinstr_resume(kbdev->vinstr_ctx);
 }
-
diff --git a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
index 763740e..53d9427 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_regs_history_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016, 2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -111,6 +111,7 @@
 
 
 static const struct file_operations regs_history_fops = {
+	.owner = THIS_MODULE,
 	.open = &regs_history_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h b/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h
new file mode 100644
index 0000000..df72eec
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_reset_gpu.h
@@ -0,0 +1,139 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_RESET_GPU_H_
+#define _KBASE_RESET_GPU_H_
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * Caller is expected to hold the kbdev->hwaccess_lock.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *           not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true  - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ *           not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu (only on Job Manager GPUs).
+ *
+ * After this function is called the caller should call kbase_reset_gpu_wait()
+ * to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu_locked if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu (only on Job Manager GPUs).
+ * Caller is expected to hold the kbdev->hwaccess_lock.
+ *
+ * After this function is called, the caller should call kbase_reset_gpu_wait()
+ * to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_silent - Reset the GPU silently
+ * @kbdev: Device pointer
+ *
+ * Reset the GPU without trying to cancel jobs (applicable to Job Manager GPUs)
+ * and don't emit messages into the kernel log while doing the reset.
+ *
+ * This function should be used in cases where we are doing a controlled reset
+ * of the GPU as part of normal processing (e.g. exiting protected mode) where
+ * the driver will have ensured the scheduler has been idled and all other
+ * users of the GPU (e.g. instrumentation) have been suspended.
+ *
+ * Return: 0 if the reset was started successfully
+ *         -EAGAIN if another reset is currently in progress
+ */
+int kbase_reset_gpu_silent(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_is_active - Reports if the GPU is being reset
+ * @kbdev: Device pointer
+ *
+ * Return: True if the GPU is in the process of being reset (or if the reset of
+ * GPU failed, not applicable to Job Manager GPUs).
+ */
+bool kbase_reset_gpu_is_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_wait - Wait for a GPU reset to complete
+ * @kbdev: Device pointer
+ *
+ * This function may wait indefinitely.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+int kbase_reset_gpu_wait(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_init - Initialize the GPU reset handling mechanism.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+int kbase_reset_gpu_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_term - Terminate the GPU reset handling mechanism.
+ *
+ * @kbdev: Device pointer
+ */
+void kbase_reset_gpu_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_register_complete_cb - Register the callback function to be
+ *                                        invoked on completion of GPU reset.
+ *
+ * @kbdev: Device pointer
+ * @complete_callback: Pointer to the callback function
+ */
+void kbase_reset_gpu_register_complete_cb(struct kbase_device *kbdev,
+			int (*complete_callback)(struct kbase_device *kbdev));
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.c b/drivers/gpu/arm/midgard/mali_kbase_smc.c
index 2176479..3470f58 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_smc.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,6 +27,18 @@
 
 #include <linux/compiler.h>
 
+/* __asmeq is not available on Kernel versions >= 4.20 */
+#ifndef __asmeq
+/*
+ * This is used to ensure the compiler did actually allocate the register we
+ * asked it for some inline assembly sequences.  Apparently we can't trust the
+ * compiler from one version to another so a bit of paranoia won't hurt.  This
+ * string is meant to be concatenated with the inline asm string and will
+ * cause compilation to stop on mismatch.  (for details, see gcc PR 15089)
+ */
+#define __asmeq(x, y)  ".ifnc " x "," y " ; .err ; .endif\n\t"
+#endif
+
 static noinline u64 invoke_smc_fid(u64 function_id,
 		u64 arg0, u64 arg1, u64 arg2)
 {
diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
index 0f66ac6..88773cc 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,10 +24,8 @@
 
 #include <mali_kbase.h>
 
-#if defined(CONFIG_DMA_SHARED_BUFFER)
 #include <linux/dma-buf.h>
 #include <asm/cacheflush.h>
-#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
 #if defined(CONFIG_SYNC) || defined(CONFIG_SYNC_FILE)
 #include <mali_kbase_sync.h>
 #endif
@@ -35,7 +33,7 @@
 #include <mali_base_kernel.h>
 #include <mali_kbase_hwaccess_time.h>
 #include <mali_kbase_mem_linux.h>
-#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
 #include <linux/version.h>
 #include <linux/ktime.h>
 #include <linux/pfn.h>
@@ -495,24 +493,6 @@
 		kbase_js_sched_all(katom->kctx->kbdev);
 }
 
-static inline void free_user_buffer(struct kbase_debug_copy_buffer *buffer)
-{
-	struct page **pages = buffer->extres_pages;
-	int nr_pages = buffer->nr_extres_pages;
-
-	if (pages) {
-		int i;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page *pg = pages[i];
-
-			if (pg)
-				put_page(pg);
-		}
-		kfree(pages);
-	}
-}
-
 static void kbase_debug_copy_finish(struct kbase_jd_atom *katom)
 {
 	struct kbase_debug_copy_buffer *buffers = katom->softjob_data;
@@ -535,12 +515,15 @@
 			if (pg)
 				put_page(pg);
 		}
-		kfree(buffers[i].pages);
+		if (buffers[i].is_vmalloc)
+			vfree(buffers[i].pages);
+		else
+			kfree(buffers[i].pages);
 		if (gpu_alloc) {
 			switch (gpu_alloc->type) {
 			case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
 			{
-				free_user_buffer(&buffers[i]);
+				kbase_free_user_buffer(&buffers[i]);
 				break;
 			}
 			default:
@@ -602,6 +585,11 @@
 		if (!addr)
 			continue;
 
+		if (last_page_addr < page_addr) {
+			ret = -EINVAL;
+			goto out_cleanup;
+		}
+
 		buffers[i].nr_pages = nr_pages;
 		buffers[i].offset = addr & ~PAGE_MASK;
 		if (buffers[i].offset >= PAGE_SIZE) {
@@ -610,8 +598,17 @@
 		}
 		buffers[i].size = user_buffers[i].size;
 
-		buffers[i].pages = kcalloc(nr_pages, sizeof(struct page *),
-				GFP_KERNEL);
+		if (nr_pages > (KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD /
+				sizeof(struct page *))) {
+			buffers[i].is_vmalloc = true;
+			buffers[i].pages = vzalloc(nr_pages *
+					sizeof(struct page *));
+		} else {
+			buffers[i].is_vmalloc = false;
+			buffers[i].pages = kcalloc(nr_pages,
+					sizeof(struct page *), GFP_KERNEL);
+		}
+
 		if (!buffers[i].pages) {
 			ret = -ENOMEM;
 			goto out_cleanup;
@@ -622,10 +619,20 @@
 					1, /* Write */
 					buffers[i].pages);
 		if (pinned_pages < 0) {
+			/* get_user_pages_fast has failed - page array is not
+			 * valid. Don't try to release any pages.
+			 */
+			buffers[i].nr_pages = 0;
+
 			ret = pinned_pages;
 			goto out_cleanup;
 		}
 		if (pinned_pages != nr_pages) {
+			/* Adjust number of pages, so that we only attempt to
+			 * release pages in the array that we know are valid.
+			 */
+			buffers[i].nr_pages = pinned_pages;
+
 			ret = -EINVAL;
 			goto out_cleanup;
 		}
@@ -675,8 +682,18 @@
 					alloc->imported.user_buf.address,
 					nr_pages, 0,
 					buffers[i].extres_pages);
-			if (ret != nr_pages)
+			if (ret != nr_pages) {
+				/* Adjust number of pages, so that we only
+				 * attempt to release pages in the array that we
+				 * know are valid.
+				 */
+				if (ret < 0)
+					buffers[i].nr_extres_pages = 0;
+				else
+					buffers[i].nr_extres_pages = ret;
+
 				goto out_unlock;
+			}
 			ret = 0;
 			break;
 		}
@@ -755,9 +772,7 @@
 	size_t to_copy = min(extres_size, buf_data->size);
 	struct kbase_mem_phy_alloc *gpu_alloc = buf_data->gpu_alloc;
 	int ret = 0;
-#ifdef CONFIG_DMA_SHARED_BUFFER
 	size_t dma_to_copy;
-#endif
 
 	KBASE_DEBUG_ASSERT(pages != NULL);
 
@@ -788,7 +803,6 @@
 		break;
 	}
 	break;
-#ifdef CONFIG_DMA_SHARED_BUFFER
 	case KBASE_MEM_TYPE_IMPORTED_UMM: {
 		struct dma_buf *dma_buf = gpu_alloc->imported.umm.dma_buf;
 
@@ -828,7 +842,6 @@
 				DMA_FROM_DEVICE);
 		break;
 	}
-#endif
 	default:
 		ret = -EINVAL;
 	}
@@ -906,6 +919,7 @@
 	__user void *data = (__user void *)(uintptr_t) katom->jc;
 	struct base_jit_alloc_info *info;
 	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
 	u32 count;
 	int ret;
 	u32 i;
@@ -938,6 +952,10 @@
 		ret = kbasep_jit_alloc_validate(kctx, info);
 		if (ret)
 			goto free_info;
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, katom,
+			info->va_pages, info->commit_pages, info->extent,
+			info->id, info->bin_id, info->max_allocations,
+			info->flags, info->usage_id);
 	}
 
 	katom->jit_blocked = false;
@@ -996,6 +1014,7 @@
 static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
 {
 	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
 	struct base_jit_alloc_info *info;
 	struct kbase_va_region *reg;
 	struct kbase_vmap_struct mapping;
@@ -1082,6 +1101,8 @@
 				}
 
 				katom->event_code = BASE_JD_EVENT_MEM_GROWTH_FAILED;
+				dev_warn_ratelimited(kbdev->dev, "JIT alloc softjob failed: atom id %d\n",
+						     kbase_jd_atom_id(kctx, katom));
 				return 0;
 			}
 
@@ -1108,6 +1129,7 @@
 	}
 
 	for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
+		u64 entry_mmu_flags = 0;
 		/*
 		 * Write the address of the JIT allocation to the user provided
 		 * GPU allocation.
@@ -1126,8 +1148,21 @@
 		reg = kctx->jit_alloc[info->id];
 		new_addr = reg->start_pfn << PAGE_SHIFT;
 		*ptr = new_addr;
-		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
-				katom, info->gpu_alloc_addr, new_addr);
+
+#if defined(CONFIG_MALI_VECTOR_DUMP)
+		/*
+		 * Retrieve the mmu flags for JIT allocation
+		 * only if dumping is enabled
+		 */
+		entry_mmu_flags = kbase_mmu_create_ate(kbdev,
+			(struct tagged_addr){ 0 }, reg->flags,
+			 MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id);
+#endif
+
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom,
+			info->gpu_alloc_addr, new_addr, info->flags,
+			entry_mmu_flags, info->id, info->commit_pages,
+			info->extent, info->va_pages);
 		kbase_vunmap(kctx, &mapping);
 	}
 
@@ -1161,9 +1196,11 @@
 static int kbase_jit_free_prepare(struct kbase_jd_atom *katom)
 {
 	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
 	__user void *data = (__user void *)(uintptr_t) katom->jc;
 	u8 *ids;
 	u32 count = MAX(katom->nr_extres, 1);
+	u32 i;
 	int ret;
 
 	/* Sanity checks */
@@ -1198,6 +1235,8 @@
 		katom->nr_extres = 1;
 		*ids = (u8)katom->jc;
 	}
+	for (i = 0; i < count; i++)
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(kbdev, katom, ids[i]);
 
 	list_add_tail(&katom->jit_node, &kctx->jit_atoms_head);
 
@@ -1274,9 +1313,12 @@
 			 * still succeed this soft job but don't try and free
 			 * the allocation.
 			 */
-			if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1)
+			if (kctx->jit_alloc[ids[j]] != (struct kbase_va_region *) -1) {
+				KBASE_TLSTREAM_TL_JIT_USEDPAGES(kctx->kbdev,
+					kctx->jit_alloc[ids[j]]->
+					gpu_alloc->nents, ids[j]);
 				kbase_jit_free(kctx, kctx->jit_alloc[ids[j]]);
-
+			}
 			kctx->jit_alloc[ids[j]] = NULL;
 		}
 	}
@@ -1425,8 +1467,10 @@
 int kbase_process_soft_job(struct kbase_jd_atom *katom)
 {
 	int ret = 0;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
 
-	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(katom);
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom);
 
 	switch (katom->core_req & BASE_JD_REQ_SOFT_JOB_TYPE) {
 	case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
@@ -1453,10 +1497,6 @@
 		break;
 	}
 #endif
-
-	case BASE_JD_REQ_SOFT_REPLAY:
-		ret = kbase_replay_process(katom);
-		break;
 	case BASE_JD_REQ_SOFT_EVENT_WAIT:
 		ret = kbasep_soft_event_wait(katom);
 		break;
@@ -1489,7 +1529,7 @@
 	}
 
 	/* Atom is complete */
-	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(katom);
+	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom);
 	return ret;
 }
 
@@ -1571,8 +1611,6 @@
 #endif /* CONFIG_SYNC || CONFIG_SYNC_FILE */
 	case BASE_JD_REQ_SOFT_JIT_ALLOC:
 		return kbase_jit_allocate_prepare(katom);
-	case BASE_JD_REQ_SOFT_REPLAY:
-		break;
 	case BASE_JD_REQ_SOFT_JIT_FREE:
 		return kbase_jit_free_prepare(katom);
 	case BASE_JD_REQ_SOFT_EVENT_WAIT:
@@ -1665,12 +1703,7 @@
 		if (kbase_process_soft_job(katom_iter) == 0) {
 			kbase_finish_soft_job(katom_iter);
 			resched |= jd_done_nolock(katom_iter, NULL);
-		} else {
-			KBASE_DEBUG_ASSERT((katom_iter->core_req &
-					BASE_JD_REQ_SOFT_JOB_TYPE)
-					!= BASE_JD_REQ_SOFT_REPLAY);
 		}
-
 		mutex_unlock(&kctx->jctx.lock);
 	}
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h
index 7988a74..785b9ff 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_sync.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -161,7 +161,11 @@
  */
 static inline void kbase_sync_fence_close_fd(int fd)
 {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)
 	ksys_close(fd);
+#else
+	sys_close(fd);
+#endif
 }
 
 /**
@@ -184,6 +188,16 @@
 int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
 				  struct kbase_sync_fence_info *info);
 
+#if defined(CONFIG_SYNC_FILE)
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+void kbase_sync_fence_info_get(struct fence *fence,
+			       struct kbase_sync_fence_info *info);
+#else
+void kbase_sync_fence_info_get(struct dma_fence *fence,
+			       struct kbase_sync_fence_info *info);
+#endif
+#endif
+
 /**
  * kbase_sync_status_string() - Get string matching @status
  * @status: Value of fence status.
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
index 5239dae..03c0df5 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_common.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,9 +41,9 @@
 const char *kbase_sync_status_string(int status)
 {
 	if (status == 0)
-		return "signaled";
-	else if (status > 0)
 		return "active";
+	else if (status > 0)
+		return "signaled";
 	else
 		return "error";
 }
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_file.c b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
index cda1f1b..0679c48 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_file.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2012-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -85,7 +85,9 @@
 	/* create a sync_file fd representing the fence */
 	sync_file = sync_file_create(fence);
 	if (!sync_file) {
+#if (KERNEL_VERSION(4, 9, 67) >= LINUX_VERSION_CODE)
 		dma_fence_put(fence);
+#endif
 		kbase_fence_out_remove(katom);
 		return -ENOMEM;
 	}
@@ -272,12 +274,12 @@
 	kbase_fence_in_remove(katom);
 }
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0))
-static void kbase_sync_fence_info_get(struct fence *fence,
-				      struct kbase_sync_fence_info *info)
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+void kbase_sync_fence_info_get(struct fence *fence,
+			       struct kbase_sync_fence_info *info)
 #else
-static void kbase_sync_fence_info_get(struct dma_fence *fence,
-				      struct kbase_sync_fence_info *info)
+void kbase_sync_fence_info_get(struct dma_fence *fence,
+			       struct kbase_sync_fence_info *info)
 #endif
 {
 	info->fence = fence;
@@ -303,9 +305,12 @@
 		info->status = 0; /* still active (unsignaled) */
 	}
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0))
+#if (KERNEL_VERSION(4, 8, 0) > LINUX_VERSION_CODE)
 	scnprintf(info->name, sizeof(info->name), "%u#%u",
 		  fence->context, fence->seqno);
+#elif (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
+	scnprintf(info->name, sizeof(info->name), "%llu#%u",
+		  fence->context, fence->seqno);
 #else
 	scnprintf(info->name, sizeof(info->name), "%llu#%llu",
 		  fence->context, fence->seqno);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_timeline.c
new file mode 100644
index 0000000..17470fc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_timeline.c
@@ -0,0 +1,342 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_timeline.h"
+#include "mali_kbase_timeline_priv.h"
+#include "mali_kbase_tracepoints.h"
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/*****************************************************************************/
+
+/* These values are used in mali_kbase_tracepoints.h
+ * to retrieve the streams from a kbase_timeline instance.
+ */
+const size_t __obj_stream_offset =
+	offsetof(struct kbase_timeline, streams)
+	+ sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_OBJ;
+
+const size_t __aux_stream_offset =
+	offsetof(struct kbase_timeline, streams)
+	+ sizeof(struct kbase_tlstream) * TL_STREAM_TYPE_AUX;
+
+/**
+ * kbasep_timeline_autoflush_timer_callback - autoflush timer callback
+ * @timer:  Timer list
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_timeline_autoflush_timer_callback(struct timer_list *timer)
+{
+	enum tl_stream_type stype;
+	int                 rcode;
+	struct kbase_timeline *timeline =
+		container_of(timer, struct kbase_timeline, autoflush_timer);
+
+	CSTD_UNUSED(timer);
+
+	for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT;
+			stype++) {
+		struct kbase_tlstream *stream = &timeline->streams[stype];
+
+		int af_cnt = atomic_read(&stream->autoflush_counter);
+
+		/* Check if stream contain unflushed data. */
+		if (af_cnt < 0)
+			continue;
+
+		/* Check if stream should be flushed now. */
+		if (af_cnt != atomic_cmpxchg(
+					&stream->autoflush_counter,
+					af_cnt,
+					af_cnt + 1))
+			continue;
+		if (!af_cnt)
+			continue;
+
+		/* Autoflush this stream. */
+		kbase_tlstream_flush_stream(stream);
+	}
+
+	if (atomic_read(&timeline->autoflush_timer_active))
+		rcode = mod_timer(
+				&timeline->autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+	CSTD_UNUSED(rcode);
+}
+
+
+
+/*****************************************************************************/
+
+int kbase_timeline_init(struct kbase_timeline **timeline,
+		atomic_t *timeline_is_enabled)
+{
+	enum tl_stream_type i;
+	struct kbase_timeline *result;
+
+	if (!timeline || !timeline_is_enabled)
+		return -EINVAL;
+
+	result = kzalloc(sizeof(*result), GFP_KERNEL);
+	if (!result)
+		return -ENOMEM;
+
+	mutex_init(&result->reader_lock);
+	init_waitqueue_head(&result->event_queue);
+
+	/* Prepare stream structures. */
+	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++)
+		kbase_tlstream_init(&result->streams[i], i,
+			&result->event_queue);
+
+	/* Initialize autoflush timer. */
+	atomic_set(&result->autoflush_timer_active, 0);
+	kbase_timer_setup(&result->autoflush_timer,
+			  kbasep_timeline_autoflush_timer_callback);
+	result->is_enabled = timeline_is_enabled;
+
+	*timeline = result;
+	return 0;
+}
+
+void kbase_timeline_term(struct kbase_timeline *timeline)
+{
+	enum tl_stream_type i;
+
+	if (!timeline)
+		return;
+
+	for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; i++)
+		kbase_tlstream_term(&timeline->streams[i]);
+
+	kfree(timeline);
+}
+
+static void kbase_create_timeline_objects(struct kbase_device *kbdev)
+{
+	unsigned int lpu_id;
+	unsigned int as_nr;
+	struct kbase_context *kctx;
+	struct kbase_timeline *timeline = kbdev->timeline;
+	struct kbase_tlstream *summary =
+		&timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY];
+
+	/* Summarize the LPU objects. */
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		u32 *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		__kbase_tlstream_tl_new_lpu(summary, lpu, lpu_id, *lpu);
+	}
+
+	/* Summarize the Address Space objects. */
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		__kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr);
+
+	/* Create GPU object and make it retain all LPUs and address spaces. */
+	__kbase_tlstream_tl_new_gpu(summary,
+			kbdev,
+			kbdev->gpu_props.props.raw_props.gpu_id,
+			kbdev->gpu_props.num_cores);
+
+	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+		void *lpu =
+			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+		__kbase_tlstream_tl_lifelink_lpu_gpu(summary, lpu, kbdev);
+	}
+
+	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
+		__kbase_tlstream_tl_lifelink_as_gpu(summary,
+				&kbdev->as[as_nr],
+				kbdev);
+
+	/* Lock the context list, to ensure no changes to the list are made
+	 * while we're summarizing the contexts and their contents.
+	 */
+	mutex_lock(&kbdev->kctx_list_lock);
+
+	/* For each context in the device... */
+	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
+		/* Summarize the context itself */
+		__kbase_tlstream_tl_new_ctx(summary,
+				kctx,
+				kctx->id,
+				(u32)(kctx->tgid));
+	};
+
+	/* Reset body stream buffers while holding the kctx lock.
+	 * This ensures we can't fire both summary and normal tracepoints for
+	 * the same objects.
+	 * If we weren't holding the lock, it's possible that the summarized
+	 * objects could have been created, destroyed, or used after we
+	 * constructed the summary stream tracepoints, but before we reset
+	 * the body stream, resulting in losing those object event tracepoints.
+	 */
+	kbase_timeline_streams_body_reset(timeline);
+
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	/* Static object are placed into summary packet that needs to be
+	 * transmitted first. Flush all streams to make it available to
+	 * user space.
+	 */
+	kbase_timeline_streams_flush(timeline);
+}
+
+#ifdef CONFIG_MALI_DEVFREQ
+static void kbase_tlstream_current_devfreq_target(struct kbase_device *kbdev)
+{
+	struct devfreq *devfreq = kbdev->devfreq;
+
+	/* Devfreq initialization failure isn't a fatal error, so devfreq might
+	 * be null.
+	 */
+	if (devfreq) {
+		unsigned long cur_freq = 0;
+
+		mutex_lock(&devfreq->lock);
+#if KERNEL_VERSION(4, 3, 0) > LINUX_VERSION_CODE
+		cur_freq = kbdev->current_nominal_freq;
+#else
+		cur_freq = devfreq->last_status.current_frequency;
+#endif
+		KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(kbdev, (u64)cur_freq);
+		mutex_unlock(&devfreq->lock);
+	}
+}
+#endif /* CONFIG_MALI_DEVFREQ */
+
+int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
+{
+	int ret;
+	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
+	struct kbase_timeline *timeline = kbdev->timeline;
+
+	if (!atomic_cmpxchg(timeline->is_enabled, 0, tlstream_enabled)) {
+		int rcode;
+
+		ret = anon_inode_getfd(
+				"[mali_tlstream]",
+				&kbasep_tlstream_fops,
+				timeline,
+				O_RDONLY | O_CLOEXEC);
+		if (ret < 0) {
+			atomic_set(timeline->is_enabled, 0);
+			return ret;
+		}
+
+		/* Reset and initialize header streams. */
+		kbase_tlstream_reset(
+			&timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY]);
+
+		timeline->obj_header_btc = obj_desc_header_size;
+		timeline->aux_header_btc = aux_desc_header_size;
+
+		/* Start autoflush timer. */
+		atomic_set(&timeline->autoflush_timer_active, 1);
+		rcode = mod_timer(
+				&timeline->autoflush_timer,
+				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+		CSTD_UNUSED(rcode);
+
+		/* If job dumping is enabled, readjust the software event's
+		 * timeout as the default value of 3 seconds is often
+		 * insufficient.
+		 */
+		if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
+			dev_info(kbdev->dev,
+					"Job dumping is enabled, readjusting the software event's timeout\n");
+			atomic_set(&kbdev->js_data.soft_job_timeout_ms,
+					1800000);
+		}
+
+		/* Summary stream was cleared during acquire.
+		 * Create static timeline objects that will be
+		 * read by client.
+		 */
+		kbase_create_timeline_objects(kbdev);
+
+#ifdef CONFIG_MALI_DEVFREQ
+		/* Devfreq target tracepoints are only fired when the target
+		 * changes, so we won't know the current target unless we
+		 * send it now.
+		 */
+		kbase_tlstream_current_devfreq_target(kbdev);
+#endif /* CONFIG_MALI_DEVFREQ */
+
+	} else {
+		ret = -EBUSY;
+	}
+
+	return ret;
+}
+
+void kbase_timeline_streams_flush(struct kbase_timeline *timeline)
+{
+	enum tl_stream_type stype;
+
+	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+		kbase_tlstream_flush_stream(&timeline->streams[stype]);
+}
+
+void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline)
+{
+	kbase_tlstream_reset(
+			&timeline->streams[TL_STREAM_TYPE_OBJ]);
+	kbase_tlstream_reset(
+			&timeline->streams[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_timeline_stats(struct kbase_timeline *timeline,
+		u32 *bytes_collected, u32 *bytes_generated)
+{
+	enum tl_stream_type stype;
+
+	KBASE_DEBUG_ASSERT(bytes_collected);
+
+	/* Accumulate bytes generated per stream  */
+	*bytes_generated = 0;
+	for (stype = (enum tl_stream_type)0; stype < TL_STREAM_TYPE_COUNT;
+			stype++)
+		*bytes_generated += atomic_read(
+			&timeline->streams[stype].bytes_generated);
+
+	*bytes_collected = atomic_read(&timeline->bytes_collected);
+}
+#endif /* MALI_UNIT_TEST */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_timeline.h
new file mode 100644
index 0000000..d800288
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_timeline.h
@@ -0,0 +1,121 @@
+/*
+ *
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_TIMELINE_H)
+#define _KBASE_TIMELINE_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+struct kbase_timeline;
+
+/**
+ * kbase_timeline_init - initialize timeline infrastructure in kernel
+ * @timeline:            Newly created instance of kbase_timeline will
+ *                       be stored in this pointer.
+ * @timeline_is_enabled: Timeline status will be written to this variable
+ *                       when a client is attached/detached. The variable
+ *                       must be valid while timeline instance is valid.
+ * Return: zero on success, negative number on error
+ */
+int kbase_timeline_init(struct kbase_timeline **timeline,
+	atomic_t *timeline_is_enabled);
+
+/**
+ * kbase_timeline_term - terminate timeline infrastructure in kernel
+ *
+ * @timeline:     Timeline instance to be terminated. It must be previously created
+ *                with kbase_timeline_init().
+ */
+void kbase_timeline_term(struct kbase_timeline *timeline);
+
+/**
+ * kbase_timeline_io_acquire - acquire timeline stream file descriptor
+ * @kbdev:     Kbase device
+ * @flags:     Timeline stream flags
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) return will be a negative value.
+ *
+ * Return: file descriptor on success, negative number on error
+ */
+int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags);
+
+/**
+ * kbase_timeline_streams_flush - flush timeline streams.
+ * @timeline:     Timeline instance
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_timeline_streams_flush(struct kbase_timeline *timeline);
+
+/**
+ * kbase_timeline_streams_body_reset - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ * @timeline:     Timeline instance
+ */
+void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_timeline_test - start timeline stream data generator
+ * @kbdev:     Kernel common context
+ * @tpw_count: Number of trace point writers in each context
+ * @msg_delay: Time delay in milliseconds between trace points written by one
+ *             writer
+ * @msg_count: Number of trace points written by one writer
+ * @aux_msg:   If non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_timeline_test(
+	struct kbase_device *kbdev,
+	unsigned int tpw_count,
+	unsigned int msg_delay,
+	unsigned int msg_count,
+	int          aux_msg);
+
+/**
+ * kbase_timeline_stats - read timeline stream statistics
+ * @timeline:        Timeline instance
+ * @bytes_collected: Will hold number of bytes read by the user
+ * @bytes_generated: Will hold number of bytes generated by trace points
+ */
+void kbase_timeline_stats(struct kbase_timeline *timeline, u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* _KBASE_TIMELINE_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_timeline_io.c b/drivers/gpu/arm/midgard/mali_kbase_timeline_io.c
new file mode 100644
index 0000000..ffcf84a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_timeline_io.c
@@ -0,0 +1,314 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase_timeline_priv.h>
+#include <mali_kbase_tlstream.h>
+#include <mali_kbase_tracepoints.h>
+
+#include <linux/poll.h>
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_timeline_io_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos);
+static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait);
+static int kbasep_timeline_io_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+const struct file_operations kbasep_tlstream_fops = {
+	.owner = THIS_MODULE,
+	.release = kbasep_timeline_io_release,
+	.read    = kbasep_timeline_io_read,
+	.poll    = kbasep_timeline_io_poll,
+};
+
+/**
+ * kbasep_timeline_io_packet_pending - check timeline streams for pending packets
+ * @timeline:      Timeline instance
+ * @ready_stream:  Pointer to variable where stream will be placed
+ * @rb_idx_raw:    Pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_timeline_io_packet_pending(
+		struct kbase_timeline  *timeline,
+		struct kbase_tlstream **ready_stream,
+		unsigned int           *rb_idx_raw)
+{
+	enum tl_stream_type i;
+
+	KBASE_DEBUG_ASSERT(ready_stream);
+	KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+	for (i = (enum tl_stream_type)0; i < TL_STREAM_TYPE_COUNT; ++i) {
+		struct kbase_tlstream *stream = &timeline->streams[i];
+		*rb_idx_raw = atomic_read(&stream->rbi);
+		/* Read buffer index may be updated by writer in case of
+		 * overflow. Read and write buffer indexes must be
+		 * loaded in correct order.
+		 */
+		smp_rmb();
+		if (atomic_read(&stream->wbi) != *rb_idx_raw) {
+			*ready_stream = stream;
+			return 1;
+		}
+
+	}
+
+	return 0;
+}
+
+/**
+ * kbasep_timeline_copy_header - copy timeline headers to the user
+ * @timeline:    Timeline instance
+ * @buffer:      Pointer to the buffer provided by user
+ * @size:        Maximum amount of data that can be stored in the buffer
+ * @copy_len:    Pointer to amount of bytes that has been copied already
+ *               within the read system call.
+ *
+ * This helper function checks if timeline headers have not been sent
+ * to the user, and if so, sends them. @ref copy_len is respectively
+ * updated.
+ *
+ * Returns: 0 if success, -1 if copy_to_user has failed.
+ */
+static inline int kbasep_timeline_copy_header(
+	struct kbase_timeline *timeline,
+	char __user *buffer,
+	size_t size,
+	ssize_t *copy_len)
+{
+	if (timeline->obj_header_btc) {
+		size_t offset = obj_desc_header_size -
+			timeline->obj_header_btc;
+
+		size_t header_cp_size = MIN(
+			size - *copy_len,
+			timeline->obj_header_btc);
+
+		if (copy_to_user(
+			    &buffer[*copy_len],
+			    &obj_desc_header[offset],
+			    header_cp_size))
+			return -1;
+
+		timeline->obj_header_btc -= header_cp_size;
+		*copy_len += header_cp_size;
+	}
+
+	if (timeline->aux_header_btc) {
+		size_t offset = aux_desc_header_size -
+			timeline->aux_header_btc;
+		size_t header_cp_size = MIN(
+			size - *copy_len,
+			timeline->aux_header_btc);
+
+		if (copy_to_user(
+			    &buffer[*copy_len],
+			    &aux_desc_header[offset],
+			    header_cp_size))
+			return -1;
+
+		timeline->aux_header_btc -= header_cp_size;
+		*copy_len += header_cp_size;
+	}
+	return 0;
+}
+
+
+/**
+ * kbasep_timeline_io_read - copy data from streams to buffer provided by user
+ * @filp:   Pointer to file structure
+ * @buffer: Pointer to the buffer provided by user
+ * @size:   Maximum amount of data that can be stored in the buffer
+ * @f_pos:  Pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_timeline_io_read(
+		struct file *filp,
+		char __user *buffer,
+		size_t      size,
+		loff_t      *f_pos)
+{
+	ssize_t copy_len = 0;
+	struct kbase_timeline *timeline;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(f_pos);
+
+	if (WARN_ON(!filp->private_data))
+		return -EFAULT;
+
+	timeline = (struct kbase_timeline *) filp->private_data;
+
+	if (!buffer)
+		return -EINVAL;
+
+	if ((*f_pos < 0) || (size < PACKET_SIZE))
+		return -EINVAL;
+
+	mutex_lock(&timeline->reader_lock);
+
+	while (copy_len < size) {
+		struct kbase_tlstream *stream = NULL;
+		unsigned int        rb_idx_raw = 0;
+		unsigned int        wb_idx_raw;
+		unsigned int        rb_idx;
+		size_t              rb_size;
+
+		if (kbasep_timeline_copy_header(
+			    timeline, buffer, size, &copy_len)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If we already read some packets and there is no
+		 * packet pending then return back to user.
+		 * If we don't have any data yet, wait for packet to be
+		 * submitted.
+		 */
+		if (copy_len > 0) {
+			if (!kbasep_timeline_io_packet_pending(
+						timeline,
+						&stream,
+						&rb_idx_raw))
+				break;
+		} else {
+			if (wait_event_interruptible(
+						timeline->event_queue,
+						kbasep_timeline_io_packet_pending(
+							timeline,
+							&stream,
+							&rb_idx_raw))) {
+				copy_len = -ERESTARTSYS;
+				break;
+			}
+		}
+
+		if (WARN_ON(!stream)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* Check if this packet fits into the user buffer.
+		 * If so copy its content.
+		 */
+		rb_idx = rb_idx_raw % PACKET_COUNT;
+		rb_size = atomic_read(&stream->buffer[rb_idx].size);
+		if (rb_size > size - copy_len)
+			break;
+		if (copy_to_user(
+					&buffer[copy_len],
+					stream->buffer[rb_idx].data,
+					rb_size)) {
+			copy_len = -EFAULT;
+			break;
+		}
+
+		/* If the distance between read buffer index and write
+		 * buffer index became more than PACKET_COUNT, then overflow
+		 * happened and we need to ignore the last portion of bytes
+		 * that we have just sent to user.
+		 */
+		smp_rmb();
+		wb_idx_raw = atomic_read(&stream->wbi);
+
+		if (wb_idx_raw - rb_idx_raw < PACKET_COUNT) {
+			copy_len += rb_size;
+			atomic_inc(&stream->rbi);
+#if MALI_UNIT_TEST
+			atomic_add(rb_size, &timeline->bytes_collected);
+#endif /* MALI_UNIT_TEST */
+
+		} else {
+			const unsigned int new_rb_idx_raw =
+				wb_idx_raw - PACKET_COUNT + 1;
+			/* Adjust read buffer index to the next valid buffer */
+			atomic_set(&stream->rbi, new_rb_idx_raw);
+		}
+	}
+
+	mutex_unlock(&timeline->reader_lock);
+
+	return copy_len;
+}
+
+/**
+ * kbasep_timeline_io_poll - poll timeline stream for packets
+ * @filp: Pointer to file structure
+ * @wait: Pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
+{
+	struct kbase_tlstream *stream;
+	unsigned int        rb_idx;
+	struct kbase_timeline *timeline;
+
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(wait);
+
+	if (WARN_ON(!filp->private_data))
+		return -EFAULT;
+
+	timeline = (struct kbase_timeline *) filp->private_data;
+
+	poll_wait(filp, &timeline->event_queue, wait);
+	if (kbasep_timeline_io_packet_pending(timeline, &stream, &rb_idx))
+		return POLLIN;
+	return 0;
+}
+
+/**
+ * kbasep_timeline_io_release - release timeline stream descriptor
+ * @inode: Pointer to inode structure
+ * @filp:  Pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
+{
+	struct kbase_timeline *timeline;
+
+	KBASE_DEBUG_ASSERT(inode);
+	KBASE_DEBUG_ASSERT(filp);
+	KBASE_DEBUG_ASSERT(filp->private_data);
+
+	CSTD_UNUSED(inode);
+
+	timeline = (struct kbase_timeline *) filp->private_data;
+
+	/* Stop autoflush timer before releasing access to streams. */
+	atomic_set(&timeline->autoflush_timer_active, 0);
+	del_timer_sync(&timeline->autoflush_timer);
+
+	atomic_set(timeline->is_enabled, 0);
+	return 0;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_timeline_priv.h b/drivers/gpu/arm/midgard/mali_kbase_timeline_priv.h
new file mode 100644
index 0000000..e4a4a20
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_timeline_priv.h
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_TIMELINE_PRIV_H)
+#define _KBASE_TIMELINE_PRIV_H
+
+#include <mali_kbase.h>
+#include <mali_kbase_tlstream.h>
+
+#include <linux/timer.h>
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+
+/**
+ * struct kbase_timeline - timeline state structure
+ * @streams:                The timeline streams generated by kernel
+ * @autoflush_timer:        Autoflush timer
+ * @autoflush_timer_active: If non-zero autoflush timer is active
+ * @reader_lock:            Reader lock. Only one reader is allowed to
+ *                          have access to the timeline streams at any given time.
+ * @event_queue:            Timeline stream event queue
+ * @bytes_collected:        Number of bytes read by user
+ * @is_enabled:             Zero, if timeline is disabled. Timeline stream flags
+ *                          otherwise. See kbase_timeline_io_acquire().
+ * @obj_header_btc:         Remaining bytes to copy for the object stream header
+ * @aux_header_btc:         Remaining bytes to copy for the aux stream header
+ */
+struct kbase_timeline {
+	struct kbase_tlstream streams[TL_STREAM_TYPE_COUNT];
+	struct timer_list autoflush_timer;
+	atomic_t          autoflush_timer_active;
+	struct mutex      reader_lock;
+	wait_queue_head_t event_queue;
+#if MALI_UNIT_TEST
+	atomic_t          bytes_collected;
+#endif /* MALI_UNIT_TEST */
+	atomic_t         *is_enabled;
+	size_t            obj_header_btc;
+	size_t            aux_header_btc;
+};
+
+extern const struct file_operations kbasep_tlstream_fops;
+
+#endif /* _KBASE_TIMELINE_PRIV_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tl_serialize.h b/drivers/gpu/arm/midgard/mali_kbase_tl_serialize.h
new file mode 100644
index 0000000..90808ce
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_tl_serialize.h
@@ -0,0 +1,127 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#if !defined(_KBASE_TL_SERIALIZE_H)
+#define _KBASE_TL_SERIALIZE_H
+
+#include <mali_kbase.h>
+
+#include <linux/timer.h>
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC       1000000000ull /* ns */
+
+/**
+ * kbasep_serialize_bytes - serialize bytes to the message buffer
+ *
+ * Serialize bytes as is using memcpy()
+ *
+ * @buffer:    Message buffer
+ * @pos:       Message buffer offset
+ * @bytes:     Bytes to serialize
+ * @len:       Length of bytes array
+ *
+ * Return: updated position in the buffer
+ */
+static inline size_t kbasep_serialize_bytes(
+		char       *buffer,
+		size_t     pos,
+		const void *bytes,
+		size_t     len)
+{
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(bytes);
+
+	memcpy(&buffer[pos], bytes, len);
+
+	return pos + len;
+}
+
+/**
+ * kbasep_serialize_string - serialize string to the message buffer
+ *
+ * String is serialized as 4 bytes for string size,
+ * then string content and then null terminator.
+ *
+ * @buffer:         Message buffer
+ * @pos:            Message buffer offset
+ * @string:         String to serialize
+ * @max_write_size: Number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static inline size_t kbasep_serialize_string(
+		char       *buffer,
+		size_t     pos,
+		const char *string,
+		size_t     max_write_size)
+{
+	u32 string_len;
+
+	KBASE_DEBUG_ASSERT(buffer);
+	KBASE_DEBUG_ASSERT(string);
+	/* Timeline string consists of at least string length and nul
+	 * terminator.
+	 */
+	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+	max_write_size -= sizeof(string_len);
+
+	string_len = strlcpy(
+			&buffer[pos + sizeof(string_len)],
+			string,
+			max_write_size);
+	string_len += sizeof(char);
+
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+	/* Update string length. */
+	memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+	return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_serialize_timestamp - serialize timestamp to the message buffer
+ *
+ * Get current timestamp using kbasep_get_timestamp()
+ * and serialize it as 64 bit unsigned integer.
+ *
+ * @buffer: Message buffer
+ * @pos:    Message buffer offset
+ *
+ * Return: updated position in the buffer
+ */
+static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos)
+{
+	struct timespec ts;
+	u64             timestamp;
+
+	getrawmonotonic(&ts);
+	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+
+	return kbasep_serialize_bytes(
+			buffer, pos,
+			&timestamp, sizeof(timestamp));
+}
+#endif /* _KBASE_TL_SERIALIZE_H */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
index 2ff45f5..2a76bc0 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,705 +20,12 @@
  *
  */
 
-#include <linux/anon_inodes.h>
-#include <linux/atomic.h>
-#include <linux/file.h>
-#include <linux/mutex.h>
-#include <linux/poll.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/stringify.h>
-#include <linux/timer.h>
-#include <linux/wait.h>
-
-#include <mali_kbase.h>
-#include <mali_kbase_jm.h>
-#include <mali_kbase_tlstream.h>
-
-/*****************************************************************************/
-
-/* The version of swtrace protocol used in timeline stream. */
-#define SWTRACE_VERSION    3
-
-/* The maximum expected length of string in tracepoint descriptor. */
-#define STRLEN_MAX         64 /* bytes */
-
-/* The number of nanoseconds in a second. */
-#define NSECS_IN_SEC       1000000000ull /* ns */
-
-/* The period of autoflush checker execution in milliseconds. */
-#define AUTOFLUSH_INTERVAL 1000 /* ms */
-
-/* The maximum size of a single packet used by timeline. */
-#define PACKET_SIZE        4096 /* bytes */
-
-/* The number of packets used by one timeline stream. */
-#define PACKET_COUNT       16
-
-/* The number of bytes reserved for packet header.
- * These value must be defined according to MIPE documentation. */
-#define PACKET_HEADER_SIZE 8 /* bytes */
-
-/* The number of bytes reserved for packet sequence number.
- * These value must be defined according to MIPE documentation. */
-#define PACKET_NUMBER_SIZE 4 /* bytes */
-
-/* Packet header - first word.
- * These values must be defined according to MIPE documentation. */
-#define PACKET_STREAMID_POS  0
-#define PACKET_STREAMID_LEN  8
-#define PACKET_RSVD1_POS     (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
-#define PACKET_RSVD1_LEN     8
-#define PACKET_TYPE_POS      (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
-#define PACKET_TYPE_LEN      3
-#define PACKET_CLASS_POS     (PACKET_TYPE_POS + PACKET_TYPE_LEN)
-#define PACKET_CLASS_LEN     7
-#define PACKET_FAMILY_POS    (PACKET_CLASS_POS + PACKET_CLASS_LEN)
-#define PACKET_FAMILY_LEN    6
-
-/* Packet header - second word
- * These values must be defined according to MIPE documentation. */
-#define PACKET_LENGTH_POS    0
-#define PACKET_LENGTH_LEN    24
-#define PACKET_SEQBIT_POS    (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
-#define PACKET_SEQBIT_LEN    1
-#define PACKET_RSVD2_POS     (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
-#define PACKET_RSVD2_LEN     7
-
-/* Types of streams generated by timeline.
- * Order is significant! Header streams must precede respective body streams. */
-enum tl_stream_type {
-	TL_STREAM_TYPE_OBJ_HEADER,
-	TL_STREAM_TYPE_OBJ_SUMMARY,
-	TL_STREAM_TYPE_OBJ,
-	TL_STREAM_TYPE_AUX_HEADER,
-	TL_STREAM_TYPE_AUX,
-
-	TL_STREAM_TYPE_COUNT
-};
-
-/* Timeline packet family ids.
- * Values are significant! Check MIPE documentation. */
-enum tl_packet_family {
-	TL_PACKET_FAMILY_CTRL = 0, /* control packets */
-	TL_PACKET_FAMILY_TL   = 1, /* timeline packets */
-
-	TL_PACKET_FAMILY_COUNT
-};
-
-/* Packet classes used in timeline streams.
- * Values are significant! Check MIPE documentation. */
-enum tl_packet_class {
-	TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
-	TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
-};
-
-/* Packet types used in timeline streams.
- * Values are significant! Check MIPE documentation. */
-enum tl_packet_type {
-	TL_PACKET_TYPE_HEADER  = 0, /* stream's header/directory */
-	TL_PACKET_TYPE_BODY    = 1, /* stream's body */
-	TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
-};
-
-/* Message ids of trace events that are recorded in the timeline stream. */
-enum tl_msg_id_obj {
-	/* Timeline object events. */
-	KBASE_TL_NEW_CTX,
-	KBASE_TL_NEW_GPU,
-	KBASE_TL_NEW_LPU,
-	KBASE_TL_NEW_ATOM,
-	KBASE_TL_NEW_AS,
-	KBASE_TL_DEL_CTX,
-	KBASE_TL_DEL_ATOM,
-	KBASE_TL_LIFELINK_LPU_GPU,
-	KBASE_TL_LIFELINK_AS_GPU,
-	KBASE_TL_RET_CTX_LPU,
-	KBASE_TL_RET_ATOM_CTX,
-	KBASE_TL_RET_ATOM_LPU,
-	KBASE_TL_NRET_CTX_LPU,
-	KBASE_TL_NRET_ATOM_CTX,
-	KBASE_TL_NRET_ATOM_LPU,
-	KBASE_TL_RET_AS_CTX,
-	KBASE_TL_NRET_AS_CTX,
-	KBASE_TL_RET_ATOM_AS,
-	KBASE_TL_NRET_ATOM_AS,
-	KBASE_TL_DEP_ATOM_ATOM,
-	KBASE_TL_NDEP_ATOM_ATOM,
-	KBASE_TL_RDEP_ATOM_ATOM,
-	KBASE_TL_ATTRIB_ATOM_CONFIG,
-	KBASE_TL_ATTRIB_ATOM_PRIORITY,
-	KBASE_TL_ATTRIB_ATOM_STATE,
-	KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
-	KBASE_TL_ATTRIB_ATOM_JIT,
-	KBASE_TL_ATTRIB_AS_CONFIG,
-	KBASE_TL_EVENT_LPU_SOFTSTOP,
-	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
-	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
-	KBASE_TL_EVENT_ATOM_SOFTJOB_START,
-	KBASE_TL_EVENT_ATOM_SOFTJOB_END,
-
-	/* Job dump specific events. */
-	KBASE_JD_GPU_SOFT_RESET
-};
-
-/* Message ids of trace events that are recorded in the auxiliary stream. */
-enum tl_msg_id_aux {
-	KBASE_AUX_PM_STATE,
-	KBASE_AUX_PAGEFAULT,
-	KBASE_AUX_PAGESALLOC,
-	KBASE_AUX_DEVFREQ_TARGET,
-	KBASE_AUX_PROTECTED_ENTER_START,
-	KBASE_AUX_PROTECTED_ENTER_END,
-	KBASE_AUX_PROTECTED_LEAVE_START,
-	KBASE_AUX_PROTECTED_LEAVE_END
-};
-
-/*****************************************************************************/
+#include "mali_kbase_tlstream.h"
+#include "mali_kbase_tl_serialize.h"
+#include "mali_kbase_mipe_proto.h"
 
 /**
- * struct tl_stream - timeline stream structure
- * @lock: message order lock
- * @buffer: array of buffers
- * @wbi: write buffer index
- * @rbi: read buffer index
- * @numbered: if non-zero stream's packets are sequentially numbered
- * @autoflush_counter: counter tracking stream's autoflush state
- *
- * This structure holds information needed to construct proper packets in the
- * timeline stream. Each message in sequence must bear timestamp that is greater
- * to one in previous message in the same stream. For this reason lock is held
- * throughout the process of message creation. Each stream contains set of
- * buffers. Each buffer will hold one MIPE packet. In case there is no free
- * space required to store incoming message the oldest buffer is discarded.
- * Each packet in timeline body stream has sequence number embedded (this value
- * must increment monotonically and is used by packets receiver to discover
- * buffer overflows.
- * Autoflush counter is set to negative number when there is no data pending
- * for flush and it is set to zero on every update of the buffer. Autoflush
- * timer will increment the counter by one on every expiry. In case there will
- * be no activity on the buffer during two consecutive timer expiries, stream
- * buffer will be flushed.
- */
-struct tl_stream {
-	spinlock_t lock;
-
-	struct {
-		atomic_t size;              /* number of bytes in buffer */
-		char     data[PACKET_SIZE]; /* buffer's data */
-	} buffer[PACKET_COUNT];
-
-	atomic_t wbi;
-	atomic_t rbi;
-
-	int      numbered;
-	atomic_t autoflush_counter;
-};
-
-/**
- * struct tp_desc - tracepoint message descriptor structure
- * @id:        tracepoint ID identifying message in stream
- * @id_str:    human readable version of tracepoint ID
- * @name:      tracepoint description
- * @arg_types: tracepoint's arguments types declaration
- * @arg_names: comma separated list of tracepoint's arguments names
- */
-struct tp_desc {
-	u32        id;
-	const char *id_str;
-	const char *name;
-	const char *arg_types;
-	const char *arg_names;
-};
-
-/*****************************************************************************/
-
-/* Configuration of timeline streams generated by kernel.
- * Kernel emit only streams containing either timeline object events or
- * auxiliary events. All streams have stream id value of 1 (as opposed to user
- * space streams that have value of 0). */
-static const struct {
-	enum tl_packet_family pkt_family;
-	enum tl_packet_class  pkt_class;
-	enum tl_packet_type   pkt_type;
-	unsigned int          stream_id;
-} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
-	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER,  1},
-	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
-	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
-	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER,  1},
-	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
-};
-
-/* The timeline streams generated by kernel. */
-static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
-
-/* Autoflush timer. */
-static struct timer_list autoflush_timer;
-
-/* If non-zero autoflush timer is active. */
-static atomic_t autoflush_timer_active;
-
-/* Reader lock. Only one reader is allowed to have access to the timeline
- * streams at any given time. */
-static DEFINE_MUTEX(tl_reader_lock);
-
-/* Timeline stream event queue. */
-static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
-
-/* The timeline stream file operations functions. */
-static ssize_t kbasep_tlstream_read(
-		struct file *filp,
-		char __user *buffer,
-		size_t      size,
-		loff_t      *f_pos);
-static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
-static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
-
-/* The timeline stream file operations structure. */
-static const struct file_operations kbasep_tlstream_fops = {
-	.release = kbasep_tlstream_release,
-	.read    = kbasep_tlstream_read,
-	.poll    = kbasep_tlstream_poll,
-};
-
-/* Descriptors of timeline messages transmitted in object events stream. */
-static const struct tp_desc tp_desc_obj[] = {
-	{
-		KBASE_TL_NEW_CTX,
-		__stringify(KBASE_TL_NEW_CTX),
-		"object ctx is created",
-		"@pII",
-		"ctx,ctx_nr,tgid"
-	},
-	{
-		KBASE_TL_NEW_GPU,
-		__stringify(KBASE_TL_NEW_GPU),
-		"object gpu is created",
-		"@pII",
-		"gpu,gpu_id,core_count"
-	},
-	{
-		KBASE_TL_NEW_LPU,
-		__stringify(KBASE_TL_NEW_LPU),
-		"object lpu is created",
-		"@pII",
-		"lpu,lpu_nr,lpu_fn"
-	},
-	{
-		KBASE_TL_NEW_ATOM,
-		__stringify(KBASE_TL_NEW_ATOM),
-		"object atom is created",
-		"@pI",
-		"atom,atom_nr"
-	},
-	{
-		KBASE_TL_NEW_AS,
-		__stringify(KBASE_TL_NEW_AS),
-		"address space object is created",
-		"@pI",
-		"address_space,as_nr"
-	},
-	{
-		KBASE_TL_DEL_CTX,
-		__stringify(KBASE_TL_DEL_CTX),
-		"context is destroyed",
-		"@p",
-		"ctx"
-	},
-	{
-		KBASE_TL_DEL_ATOM,
-		__stringify(KBASE_TL_DEL_ATOM),
-		"atom is destroyed",
-		"@p",
-		"atom"
-	},
-	{
-		KBASE_TL_LIFELINK_LPU_GPU,
-		__stringify(KBASE_TL_LIFELINK_LPU_GPU),
-		"lpu is deleted with gpu",
-		"@pp",
-		"lpu,gpu"
-	},
-	{
-		KBASE_TL_LIFELINK_AS_GPU,
-		__stringify(KBASE_TL_LIFELINK_AS_GPU),
-		"address space is deleted with gpu",
-		"@pp",
-		"address_space,gpu"
-	},
-	{
-		KBASE_TL_RET_CTX_LPU,
-		__stringify(KBASE_TL_RET_CTX_LPU),
-		"context is retained by lpu",
-		"@pp",
-		"ctx,lpu"
-	},
-	{
-		KBASE_TL_RET_ATOM_CTX,
-		__stringify(KBASE_TL_RET_ATOM_CTX),
-		"atom is retained by context",
-		"@pp",
-		"atom,ctx"
-	},
-	{
-		KBASE_TL_RET_ATOM_LPU,
-		__stringify(KBASE_TL_RET_ATOM_LPU),
-		"atom is retained by lpu",
-		"@pps",
-		"atom,lpu,attrib_match_list"
-	},
-	{
-		KBASE_TL_NRET_CTX_LPU,
-		__stringify(KBASE_TL_NRET_CTX_LPU),
-		"context is released by lpu",
-		"@pp",
-		"ctx,lpu"
-	},
-	{
-		KBASE_TL_NRET_ATOM_CTX,
-		__stringify(KBASE_TL_NRET_ATOM_CTX),
-		"atom is released by context",
-		"@pp",
-		"atom,ctx"
-	},
-	{
-		KBASE_TL_NRET_ATOM_LPU,
-		__stringify(KBASE_TL_NRET_ATOM_LPU),
-		"atom is released by lpu",
-		"@pp",
-		"atom,lpu"
-	},
-	{
-		KBASE_TL_RET_AS_CTX,
-		__stringify(KBASE_TL_RET_AS_CTX),
-		"address space is retained by context",
-		"@pp",
-		"address_space,ctx"
-	},
-	{
-		KBASE_TL_NRET_AS_CTX,
-		__stringify(KBASE_TL_NRET_AS_CTX),
-		"address space is released by context",
-		"@pp",
-		"address_space,ctx"
-	},
-	{
-		KBASE_TL_RET_ATOM_AS,
-		__stringify(KBASE_TL_RET_ATOM_AS),
-		"atom is retained by address space",
-		"@pp",
-		"atom,address_space"
-	},
-	{
-		KBASE_TL_NRET_ATOM_AS,
-		__stringify(KBASE_TL_NRET_ATOM_AS),
-		"atom is released by address space",
-		"@pp",
-		"atom,address_space"
-	},
-	{
-		KBASE_TL_DEP_ATOM_ATOM,
-		__stringify(KBASE_TL_DEP_ATOM_ATOM),
-		"atom2 depends on atom1",
-		"@pp",
-		"atom1,atom2"
-	},
-	{
-		KBASE_TL_NDEP_ATOM_ATOM,
-		__stringify(KBASE_TL_NDEP_ATOM_ATOM),
-		"atom2 no longer depends on atom1",
-		"@pp",
-		"atom1,atom2"
-	},
-	{
-		KBASE_TL_RDEP_ATOM_ATOM,
-		__stringify(KBASE_TL_RDEP_ATOM_ATOM),
-		"resolved dependecy of atom2 depending on atom1",
-		"@pp",
-		"atom1,atom2"
-	},
-	{
-		KBASE_TL_ATTRIB_ATOM_CONFIG,
-		__stringify(KBASE_TL_ATTRIB_ATOM_CONFIG),
-		"atom job slot attributes",
-		"@pLLI",
-		"atom,descriptor,affinity,config"
-	},
-	{
-		KBASE_TL_ATTRIB_ATOM_PRIORITY,
-		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITY),
-		"atom priority",
-		"@pI",
-		"atom,prio"
-	},
-	{
-		KBASE_TL_ATTRIB_ATOM_STATE,
-		__stringify(KBASE_TL_ATTRIB_ATOM_STATE),
-		"atom state",
-		"@pI",
-		"atom,state"
-	},
-	{
-		KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
-		__stringify(KBASE_TL_ATTRIB_ATOM_PRIORITIZED),
-		"atom caused priority change",
-		"@p",
-		"atom"
-	},
-	{
-		KBASE_TL_ATTRIB_ATOM_JIT,
-		__stringify(KBASE_TL_ATTRIB_ATOM_JIT),
-		"jit done for atom",
-		"@pLL",
-		"atom,edit_addr,new_addr"
-	},
-	{
-		KBASE_TL_ATTRIB_AS_CONFIG,
-		__stringify(KBASE_TL_ATTRIB_AS_CONFIG),
-		"address space attributes",
-		"@pLLL",
-		"address_space,transtab,memattr,transcfg"
-	},
-	{
-		KBASE_TL_EVENT_LPU_SOFTSTOP,
-		__stringify(KBASE_TL_EVENT_LPU_SOFTSTOP),
-		"softstop event on given lpu",
-		"@p",
-		"lpu"
-	},
-	{
-		KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
-		__stringify(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX),
-		"atom softstopped",
-		"@p",
-		"atom"
-	},
-	{
-		KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
-		__stringify(KBASE_TL_EVENT_SOFTSTOP_ISSUE),
-		"atom softstop issued",
-		"@p",
-		"atom"
-	},
-	{
-		KBASE_TL_EVENT_ATOM_SOFTJOB_START,
-		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_START),
-		"atom soft job has started",
-		"@p",
-		"atom"
-	},
-	{
-		KBASE_TL_EVENT_ATOM_SOFTJOB_END,
-		__stringify(KBASE_TL_EVENT_ATOM_SOFTJOB_END),
-		"atom soft job has completed",
-		"@p",
-		"atom"
-	},
-	{
-		KBASE_JD_GPU_SOFT_RESET,
-		__stringify(KBASE_JD_GPU_SOFT_RESET),
-		"gpu soft reset",
-		"@p",
-		"gpu"
-	},
-};
-
-/* Descriptors of timeline messages transmitted in auxiliary events stream. */
-static const struct tp_desc tp_desc_aux[] = {
-	{
-		KBASE_AUX_PM_STATE,
-		__stringify(KBASE_AUX_PM_STATE),
-		"PM state",
-		"@IL",
-		"core_type,core_state_bitset"
-	},
-	{
-		KBASE_AUX_PAGEFAULT,
-		__stringify(KBASE_AUX_PAGEFAULT),
-		"Page fault",
-		"@IL",
-		"ctx_nr,page_cnt_change"
-	},
-	{
-		KBASE_AUX_PAGESALLOC,
-		__stringify(KBASE_AUX_PAGESALLOC),
-		"Total alloc pages change",
-		"@IL",
-		"ctx_nr,page_cnt"
-	},
-	{
-		KBASE_AUX_DEVFREQ_TARGET,
-		__stringify(KBASE_AUX_DEVFREQ_TARGET),
-		"New device frequency target",
-		"@L",
-		"target_freq"
-	},
-	{
-		KBASE_AUX_PROTECTED_ENTER_START,
-		__stringify(KBASE_AUX_PROTECTED_ENTER_START),
-		"enter protected mode start",
-		"@p",
-		"gpu"
-	},
-	{
-		KBASE_AUX_PROTECTED_ENTER_END,
-		__stringify(KBASE_AUX_PROTECTED_ENTER_END),
-		"enter protected mode end",
-		"@p",
-		"gpu"
-	},
-	{
-		KBASE_AUX_PROTECTED_LEAVE_START,
-		__stringify(KBASE_AUX_PROTECTED_LEAVE_START),
-		"leave protected mode start",
-		"@p",
-		"gpu"
-	},
-	{
-		KBASE_AUX_PROTECTED_LEAVE_END,
-		__stringify(KBASE_AUX_PROTECTED_LEAVE_END),
-		"leave protected mode end",
-		"@p",
-		"gpu"
-	}
-};
-
-#if MALI_UNIT_TEST
-/* Number of bytes read by user. */
-static atomic_t tlstream_bytes_collected = {0};
-
-/* Number of bytes generated by tracepoint messages. */
-static atomic_t tlstream_bytes_generated = {0};
-#endif /* MALI_UNIT_TEST */
-
-/*****************************************************************************/
-
-/* Indicator of whether the timeline stream file descriptor is used. */
-atomic_t kbase_tlstream_enabled = {0};
-
-/*****************************************************************************/
-
-/**
- * kbasep_tlstream_get_timestamp - return timestamp
- *
- * Function returns timestamp value based on raw monotonic timer. Value will
- * wrap around zero in case of overflow.
- * Return: timestamp value
- */
-static u64 kbasep_tlstream_get_timestamp(void)
-{
-	struct timespec ts;
-	u64             timestamp;
-
-	getrawmonotonic(&ts);
-	timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
-	return timestamp;
-}
-
-/**
- * kbasep_tlstream_write_bytes - write data to message buffer
- * @buffer: buffer where data will be written
- * @pos:    position in the buffer where to place data
- * @bytes:  pointer to buffer holding data
- * @len:    length of data to be written
- *
- * Return: updated position in the buffer
- */
-static size_t kbasep_tlstream_write_bytes(
-		char       *buffer,
-		size_t     pos,
-		const void *bytes,
-		size_t     len)
-{
-	KBASE_DEBUG_ASSERT(buffer);
-	KBASE_DEBUG_ASSERT(bytes);
-
-	memcpy(&buffer[pos], bytes, len);
-
-	return pos + len;
-}
-
-/**
- * kbasep_tlstream_write_string - write string to message buffer
- * @buffer:         buffer where data will be written
- * @pos:            position in the buffer where to place data
- * @string:         pointer to buffer holding the source string
- * @max_write_size: number of bytes that can be stored in buffer
- *
- * Return: updated position in the buffer
- */
-static size_t kbasep_tlstream_write_string(
-		char       *buffer,
-		size_t     pos,
-		const char *string,
-		size_t     max_write_size)
-{
-	u32 string_len;
-
-	KBASE_DEBUG_ASSERT(buffer);
-	KBASE_DEBUG_ASSERT(string);
-	/* Timeline string consists of at least string length and nul
-	 * terminator. */
-	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
-	max_write_size -= sizeof(string_len);
-
-	string_len = strlcpy(
-			&buffer[pos + sizeof(string_len)],
-			string,
-			max_write_size);
-	string_len += sizeof(char);
-
-	/* Make sure that the source string fit into the buffer. */
-	KBASE_DEBUG_ASSERT(string_len <= max_write_size);
-
-	/* Update string length. */
-	memcpy(&buffer[pos], &string_len, sizeof(string_len));
-
-	return pos + sizeof(string_len) + string_len;
-}
-
-/**
- * kbasep_tlstream_write_timestamp - write timestamp to message buffer
- * @buffer: buffer where data will be written
- * @pos:    position in the buffer where to place data
- *
- * Return: updated position in the buffer
- */
-static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
-{
-	u64 timestamp = kbasep_tlstream_get_timestamp();
-
-	return kbasep_tlstream_write_bytes(
-			buffer, pos,
-			&timestamp, sizeof(timestamp));
-}
-
-/**
- * kbasep_tlstream_put_bits - put bits in a word
- * @word:   pointer to the words being modified
- * @value:  value that shall be written to given position
- * @bitpos: position where value shall be written (in bits)
- * @bitlen: length of value (in bits)
- */
-static void kbasep_tlstream_put_bits(
-		u32          *word,
-		u32          value,
-		unsigned int bitpos,
-		unsigned int bitlen)
-{
-	const u32 mask = ((1 << bitlen) - 1) << bitpos;
-
-	KBASE_DEBUG_ASSERT(word);
-	KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
-	KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
-
-	*word &= ~mask;
-	*word |= ((value << bitpos) & mask);
-}
-
-/**
- * kbasep_tlstream_packet_header_setup - setup the packet header
+ * kbasep_packet_header_setup - setup the packet header
  * @buffer:     pointer to the buffer
  * @pkt_family: packet's family
  * @pkt_type:   packet's type
@@ -728,107 +35,69 @@
  *
  * Function sets up immutable part of packet header in the given buffer.
  */
-static void kbasep_tlstream_packet_header_setup(
-		char                  *buffer,
-		enum tl_packet_family pkt_family,
-		enum tl_packet_class  pkt_class,
-		enum tl_packet_type   pkt_type,
-		unsigned int          stream_id,
-		int                   numbered)
+static void kbasep_packet_header_setup(
+	char                  *buffer,
+	enum tl_packet_family pkt_family,
+	enum tl_packet_class  pkt_class,
+	enum tl_packet_type   pkt_type,
+	unsigned int          stream_id,
+	int                   numbered)
 {
-	u32 word0 = 0;
-	u32 word1 = 0;
-
-	KBASE_DEBUG_ASSERT(buffer);
-	KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
-	KBASE_DEBUG_ASSERT(
-			(pkt_type == TL_PACKET_TYPE_HEADER)  ||
-			(pkt_type == TL_PACKET_TYPE_SUMMARY) ||
-			(pkt_type == TL_PACKET_TYPE_BODY));
-	KBASE_DEBUG_ASSERT(
-			(pkt_class == TL_PACKET_CLASS_OBJ) ||
-			(pkt_class == TL_PACKET_CLASS_AUX));
-
-	kbasep_tlstream_put_bits(
-			&word0, pkt_family,
-			PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
-	kbasep_tlstream_put_bits(
-			&word0, pkt_class,
-			PACKET_CLASS_POS, PACKET_CLASS_LEN);
-	kbasep_tlstream_put_bits(
-			&word0, pkt_type,
-			PACKET_TYPE_POS, PACKET_TYPE_LEN);
-	kbasep_tlstream_put_bits(
-			&word0, stream_id,
-			PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
-
-	if (numbered)
-		kbasep_tlstream_put_bits(
-				&word1, 1,
-				PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
-
-	memcpy(&buffer[0],             &word0, sizeof(word0));
-	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+	u32 words[2] = {
+		MIPE_PACKET_HEADER_W0(pkt_family, pkt_class, pkt_type, stream_id),
+		MIPE_PACKET_HEADER_W1(0, !!numbered),
+	};
+	memcpy(buffer, words, sizeof(words));
 }
 
 /**
- * kbasep_tlstream_packet_header_update - update the packet header
+ * kbasep_packet_header_update - update the packet header
  * @buffer:    pointer to the buffer
  * @data_size: amount of data carried in this packet
+ * @numbered:   non-zero if the stream is numbered
  *
  * Function updates mutable part of packet header in the given buffer.
  * Note that value of data_size must not including size of the header.
  */
-static void kbasep_tlstream_packet_header_update(
-		char   *buffer,
-		size_t data_size)
+static void kbasep_packet_header_update(
+		char  *buffer,
+		size_t data_size,
+		int    numbered)
 {
 	u32 word0;
-	u32 word1;
+	u32 word1 = MIPE_PACKET_HEADER_W1((u32)data_size, !!numbered);
 
 	KBASE_DEBUG_ASSERT(buffer);
 	CSTD_UNUSED(word0);
 
-	memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
-
-	kbasep_tlstream_put_bits(
-			&word1, data_size,
-			PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
-
 	memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
 }
 
 /**
- * kbasep_tlstream_packet_number_update - update the packet number
+ * kbasep_packet_number_update - update the packet number
  * @buffer:  pointer to the buffer
  * @counter: value of packet counter for this packet's stream
  *
  * Function updates packet number embedded within the packet placed in the
  * given buffer.
  */
-static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+static void kbasep_packet_number_update(char *buffer, u32 counter)
 {
 	KBASE_DEBUG_ASSERT(buffer);
 
 	memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
 }
 
-/**
- * kbasep_timeline_stream_reset - reset stream
- * @stream:  pointer to the stream structure
- *
- * Function discards all pending messages and resets packet counters.
- */
-static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+void kbase_tlstream_reset(struct kbase_tlstream *stream)
 {
 	unsigned int i;
 
 	for (i = 0; i < PACKET_COUNT; i++) {
 		if (stream->numbered)
 			atomic_set(
-					&stream->buffer[i].size,
-					PACKET_HEADER_SIZE +
-					PACKET_NUMBER_SIZE);
+				&stream->buffer[i].size,
+				PACKET_HEADER_SIZE +
+				PACKET_NUMBER_SIZE);
 		else
 			atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
 	}
@@ -837,14 +106,26 @@
 	atomic_set(&stream->rbi, 0);
 }
 
-/**
- * kbasep_timeline_stream_init - initialize timeline stream
- * @stream:      pointer to the stream structure
- * @stream_type: stream type
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0).
  */
-static void kbasep_timeline_stream_init(
-		struct tl_stream    *stream,
-		enum tl_stream_type stream_type)
+static const struct {
+	enum tl_packet_family pkt_family;
+	enum tl_packet_class  pkt_class;
+	enum tl_packet_type   pkt_type;
+	unsigned int          stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY,    1},
+	{TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY,    1}
+};
+
+void kbase_tlstream_init(
+	struct kbase_tlstream *stream,
+	enum tl_stream_type    stream_type,
+	wait_queue_head_t     *ready_read)
 {
 	unsigned int i;
 
@@ -860,80 +141,69 @@
 		stream->numbered = 0;
 
 	for (i = 0; i < PACKET_COUNT; i++)
-		kbasep_tlstream_packet_header_setup(
-				stream->buffer[i].data,
-				tl_stream_cfg[stream_type].pkt_family,
-				tl_stream_cfg[stream_type].pkt_class,
-				tl_stream_cfg[stream_type].pkt_type,
-				tl_stream_cfg[stream_type].stream_id,
-				stream->numbered);
+		kbasep_packet_header_setup(
+			stream->buffer[i].data,
+			tl_stream_cfg[stream_type].pkt_family,
+			tl_stream_cfg[stream_type].pkt_class,
+			tl_stream_cfg[stream_type].pkt_type,
+			tl_stream_cfg[stream_type].stream_id,
+			stream->numbered);
 
-	kbasep_timeline_stream_reset(tl_stream[stream_type]);
+#if MALI_UNIT_TEST
+	atomic_set(&stream->bytes_generated, 0);
+#endif
+	stream->ready_read = ready_read;
+
+	kbase_tlstream_reset(stream);
 }
 
-/**
- * kbasep_timeline_stream_term - terminate timeline stream
- * @stream: pointer to the stream structure
- */
-static void kbasep_timeline_stream_term(struct tl_stream *stream)
+void kbase_tlstream_term(struct kbase_tlstream *stream)
 {
 	KBASE_DEBUG_ASSERT(stream);
 }
 
 /**
- * kbasep_tlstream_msgbuf_submit - submit packet to the user space
- * @stream:     pointer to the stream structure
- * @wb_idx_raw: write buffer index
- * @wb_size:    length of data stored in current buffer
+ * kbase_tlstream_msgbuf_submit - submit packet to user space
+ * @stream:     Pointer to the stream structure
+ * @wb_idx_raw: Write buffer index
+ * @wb_size:    Length of data stored in the current buffer
  *
- * Function updates currently written buffer with packet header. Then write
- * index is incremented and buffer is handled to user space. Parameters
- * of new buffer are returned using provided arguments.
+ * Updates currently written buffer with the packet header.
+ * Then write index is incremented and the buffer is handed to user space.
+ * Parameters of the new buffer are returned using provided arguments.
  *
- * Return: length of data in new buffer
+ * Return: length of data in the new buffer
  *
- * Warning:  User must update the stream structure with returned value.
+ * Warning: the user must update the stream structure with returned value.
  */
 static size_t kbasep_tlstream_msgbuf_submit(
-		struct tl_stream *stream,
+		struct kbase_tlstream *stream,
 		unsigned int      wb_idx_raw,
 		unsigned int      wb_size)
 {
-	unsigned int rb_idx_raw = atomic_read(&stream->rbi);
 	unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
 
 	/* Set stream as flushed. */
 	atomic_set(&stream->autoflush_counter, -1);
 
-	kbasep_tlstream_packet_header_update(
-			stream->buffer[wb_idx].data,
-			wb_size - PACKET_HEADER_SIZE);
+	kbasep_packet_header_update(
+		stream->buffer[wb_idx].data,
+		wb_size - PACKET_HEADER_SIZE,
+		stream->numbered);
 
 	if (stream->numbered)
-		kbasep_tlstream_packet_number_update(
-				stream->buffer[wb_idx].data,
-				wb_idx_raw);
+		kbasep_packet_number_update(
+			stream->buffer[wb_idx].data,
+			wb_idx_raw);
 
 	/* Increasing write buffer index will expose this packet to the reader.
 	 * As stream->lock is not taken on reader side we must make sure memory
 	 * is updated correctly before this will happen. */
 	smp_wmb();
-	wb_idx_raw++;
-	atomic_set(&stream->wbi, wb_idx_raw);
+	atomic_inc(&stream->wbi);
 
 	/* Inform user that packets are ready for reading. */
-	wake_up_interruptible(&tl_event_queue);
-
-	/* Detect and mark overflow in this stream. */
-	if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
-		/* Reader side depends on this increment to correctly handle
-		 * overflows. The value shall be updated only if it was not
-		 * modified by the reader. The data holding buffer will not be
-		 * updated before stream->lock is released, however size of the
-		 * buffer will. Make sure this increment is globally visible
-		 * before information about selected write buffer size. */
-		atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
-	}
+	wake_up_interruptible(stream->ready_read);
 
 	wb_size = PACKET_HEADER_SIZE;
 	if (stream->numbered)
@@ -942,37 +212,18 @@
 	return wb_size;
 }
 
-/**
- * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
- * @stream_type: type of the stream that shall be locked
- * @msg_size:    message size
- * @flags:       pointer to store flags passed back on stream release
- *
- * Function will lock the stream and reserve the number of bytes requested
- * in msg_size for the user.
- *
- * Return: pointer to the buffer where message can be stored
- *
- * Warning: Stream must be released with kbasep_tlstream_msgbuf_release().
- *          Only atomic operations are allowed while stream is locked
- *          (i.e. do not use any operation that may sleep).
- */
-static char *kbasep_tlstream_msgbuf_acquire(
-		enum tl_stream_type stream_type,
-		size_t              msg_size,
-		unsigned long       *flags) __acquires(&stream->lock)
+char *kbase_tlstream_msgbuf_acquire(
+	struct kbase_tlstream *stream,
+	size_t              msg_size,
+	unsigned long       *flags) __acquires(&stream->lock)
 {
-	struct tl_stream *stream;
 	unsigned int     wb_idx_raw;
 	unsigned int     wb_idx;
 	size_t           wb_size;
 
-	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
 	KBASE_DEBUG_ASSERT(
-			PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
-			msg_size);
-
-	stream = tl_stream[stream_type];
+		PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+		msg_size);
 
 	spin_lock_irqsave(&stream->lock, *flags);
 
@@ -991,47 +242,25 @@
 	atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
 
 #if MALI_UNIT_TEST
-	atomic_add(msg_size, &tlstream_bytes_generated);
+	atomic_add(msg_size, &stream->bytes_generated);
 #endif /* MALI_UNIT_TEST */
 
 	return &stream->buffer[wb_idx].data[wb_size];
 }
 
-/**
- * kbasep_tlstream_msgbuf_release - unlock selected stream
- * @stream_type:  type of the stream that shall be locked
- * @flags:        value obtained during stream acquire
- *
- * Function releases stream that has been previously locked with a call to
- * kbasep_tlstream_msgbuf_acquire().
- */
-static void kbasep_tlstream_msgbuf_release(
-		enum tl_stream_type stream_type,
-		unsigned long       flags) __releases(&stream->lock)
+void kbase_tlstream_msgbuf_release(
+	struct kbase_tlstream *stream,
+	unsigned long       flags) __releases(&stream->lock)
 {
-	struct tl_stream *stream;
-
-	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
-
-	stream = tl_stream[stream_type];
-
 	/* Mark stream as containing unflushed data. */
 	atomic_set(&stream->autoflush_counter, 0);
 
 	spin_unlock_irqrestore(&stream->lock, flags);
 }
 
-/*****************************************************************************/
-
-/**
- * kbasep_tlstream_flush_stream - flush stream
- * @stype:  type of stream to be flushed
- *
- * Flush pending data in timeline stream.
- */
-static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+void kbase_tlstream_flush_stream(
+	struct kbase_tlstream *stream)
 {
-	struct tl_stream *stream = tl_stream[stype];
 	unsigned long    flags;
 	unsigned int     wb_idx_raw;
 	unsigned int     wb_idx;
@@ -1056,1583 +285,3 @@
 	spin_unlock_irqrestore(&stream->lock, flags);
 }
 
-/**
- * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
- * @timer: unused
- *
- * Timer is executed periodically to check if any of the stream contains
- * buffer ready to be submitted to user space.
- */
-static void kbasep_tlstream_autoflush_timer_callback(struct timer_list *timer)
-{
-	enum tl_stream_type stype;
-	int                 rcode;
-
-	CSTD_UNUSED(timer);
-
-	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
-		struct tl_stream *stream = tl_stream[stype];
-		unsigned long    flags;
-		unsigned int     wb_idx_raw;
-		unsigned int     wb_idx;
-		size_t           wb_size;
-		size_t           min_size = PACKET_HEADER_SIZE;
-
-		int af_cnt = atomic_read(&stream->autoflush_counter);
-
-		/* Check if stream contain unflushed data. */
-		if (0 > af_cnt)
-			continue;
-
-		/* Check if stream should be flushed now. */
-		if (af_cnt != atomic_cmpxchg(
-					&stream->autoflush_counter,
-					af_cnt,
-					af_cnt + 1))
-			continue;
-		if (!af_cnt)
-			continue;
-
-		/* Autoflush this stream. */
-		if (stream->numbered)
-			min_size += PACKET_NUMBER_SIZE;
-
-		spin_lock_irqsave(&stream->lock, flags);
-
-		wb_idx_raw = atomic_read(&stream->wbi);
-		wb_idx     = wb_idx_raw % PACKET_COUNT;
-		wb_size    = atomic_read(&stream->buffer[wb_idx].size);
-
-		if (wb_size > min_size) {
-			wb_size = kbasep_tlstream_msgbuf_submit(
-					stream, wb_idx_raw, wb_size);
-			wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
-			atomic_set(&stream->buffer[wb_idx].size,
-					wb_size);
-		}
-		spin_unlock_irqrestore(&stream->lock, flags);
-	}
-
-	if (atomic_read(&autoflush_timer_active))
-		rcode = mod_timer(
-				&autoflush_timer,
-				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
-	CSTD_UNUSED(rcode);
-}
-
-/**
- * kbasep_tlstream_packet_pending - check timeline streams for pending packets
- * @stype:      pointer to variable where stream type will be placed
- * @rb_idx_raw: pointer to variable where read buffer index will be placed
- *
- * Function checks all streams for pending packets. It will stop as soon as
- * packet ready to be submitted to user space is detected. Variables under
- * pointers, passed as the parameters to this function will be updated with
- * values pointing to right stream and buffer.
- *
- * Return: non-zero if any of timeline streams has at last one packet ready
- */
-static int kbasep_tlstream_packet_pending(
-		enum tl_stream_type *stype,
-		unsigned int        *rb_idx_raw)
-{
-	int pending = 0;
-
-	KBASE_DEBUG_ASSERT(stype);
-	KBASE_DEBUG_ASSERT(rb_idx_raw);
-
-	for (
-			*stype = 0;
-			(*stype < TL_STREAM_TYPE_COUNT) && !pending;
-			(*stype)++) {
-		if (NULL != tl_stream[*stype]) {
-			*rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
-			/* Read buffer index may be updated by writer in case of
-			 * overflow. Read and write buffer indexes must be
-			 * loaded in correct order. */
-			smp_rmb();
-			if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
-				pending = 1;
-		}
-	}
-	(*stype)--;
-
-	return pending;
-}
-
-/**
- * kbasep_tlstream_read - copy data from streams to buffer provided by user
- * @filp:   pointer to file structure (unused)
- * @buffer: pointer to the buffer provided by user
- * @size:   maximum amount of data that can be stored in the buffer
- * @f_pos:  pointer to file offset (unused)
- *
- * Return: number of bytes stored in the buffer
- */
-static ssize_t kbasep_tlstream_read(
-		struct file *filp,
-		char __user *buffer,
-		size_t      size,
-		loff_t      *f_pos)
-{
-	ssize_t copy_len = 0;
-
-	KBASE_DEBUG_ASSERT(filp);
-	KBASE_DEBUG_ASSERT(f_pos);
-
-	if (!buffer)
-		return -EINVAL;
-
-	if ((0 > *f_pos) || (PACKET_SIZE > size))
-		return -EINVAL;
-
-	mutex_lock(&tl_reader_lock);
-
-	while (copy_len < size) {
-		enum tl_stream_type stype;
-		unsigned int        rb_idx_raw = 0;
-		unsigned int        rb_idx;
-		size_t              rb_size;
-
-		/* If we don't have any data yet, wait for packet to be
-		 * submitted. If we already read some packets and there is no
-		 * packet pending return back to user. */
-		if (0 < copy_len) {
-			if (!kbasep_tlstream_packet_pending(
-						&stype,
-						&rb_idx_raw))
-				break;
-		} else {
-			if (wait_event_interruptible(
-						tl_event_queue,
-						kbasep_tlstream_packet_pending(
-							&stype,
-							&rb_idx_raw))) {
-				copy_len = -ERESTARTSYS;
-				break;
-			}
-		}
-
-		/* Check if this packet fits into the user buffer.
-		 * If so copy its content. */
-		rb_idx = rb_idx_raw % PACKET_COUNT;
-		rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
-		if (rb_size > size - copy_len)
-			break;
-		if (copy_to_user(
-					&buffer[copy_len],
-					tl_stream[stype]->buffer[rb_idx].data,
-					rb_size)) {
-			copy_len = -EFAULT;
-			break;
-		}
-
-		/* If the rbi still points to the packet we just processed
-		 * then there was no overflow so we add the copied size to
-		 * copy_len and move rbi on to the next packet
-		 */
-		smp_rmb();
-		if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
-			copy_len += rb_size;
-			atomic_inc(&tl_stream[stype]->rbi);
-
-#if MALI_UNIT_TEST
-			atomic_add(rb_size, &tlstream_bytes_collected);
-#endif /* MALI_UNIT_TEST */
-		}
-	}
-
-	mutex_unlock(&tl_reader_lock);
-
-	return copy_len;
-}
-
-/**
- * kbasep_tlstream_poll - poll timeline stream for packets
- * @filp: pointer to file structure
- * @wait: pointer to poll table
- * Return: POLLIN if data can be read without blocking, otherwise zero
- */
-static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
-{
-	enum tl_stream_type stream_type;
-	unsigned int        rb_idx;
-
-	KBASE_DEBUG_ASSERT(filp);
-	KBASE_DEBUG_ASSERT(wait);
-
-	poll_wait(filp, &tl_event_queue, wait);
-	if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
-		return POLLIN;
-	return 0;
-}
-
-/**
- * kbasep_tlstream_release - release timeline stream descriptor
- * @inode: pointer to inode structure
- * @filp:  pointer to file structure
- *
- * Return always return zero
- */
-static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
-{
-	KBASE_DEBUG_ASSERT(inode);
-	KBASE_DEBUG_ASSERT(filp);
-	CSTD_UNUSED(inode);
-	CSTD_UNUSED(filp);
-
-	/* Stop autoflush timer before releasing access to streams. */
-	atomic_set(&autoflush_timer_active, 0);
-	del_timer_sync(&autoflush_timer);
-
-	atomic_set(&kbase_tlstream_enabled, 0);
-	return 0;
-}
-
-/**
- * kbasep_tlstream_timeline_header - prepare timeline header stream packet
- * @stream_type: type of the stream that will carry header data
- * @tp_desc:     pointer to array with tracepoint descriptors
- * @tp_count:    number of descriptors in the given array
- *
- * Functions fills in information about tracepoints stored in body stream
- * associated with this header stream.
- */
-static void kbasep_tlstream_timeline_header(
-		enum tl_stream_type  stream_type,
-		const struct tp_desc *tp_desc,
-		u32                  tp_count)
-{
-	const u8      tv = SWTRACE_VERSION; /* protocol version */
-	const u8      ps = sizeof(void *); /* pointer size */
-	size_t        msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
-	char          *buffer;
-	size_t        pos = 0;
-	unsigned long flags;
-	unsigned int  i;
-
-	KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
-	KBASE_DEBUG_ASSERT(tp_desc);
-
-	/* Calculate the size of the timeline message. */
-	for (i = 0; i < tp_count; i++) {
-		msg_size += sizeof(tp_desc[i].id);
-		msg_size +=
-			strnlen(tp_desc[i].id_str,    STRLEN_MAX) +
-			sizeof(char) + sizeof(u32);
-		msg_size +=
-			strnlen(tp_desc[i].name,      STRLEN_MAX) +
-			sizeof(char) + sizeof(u32);
-		msg_size +=
-			strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
-			sizeof(char) + sizeof(u32);
-		msg_size +=
-			strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
-			sizeof(char) + sizeof(u32);
-	}
-
-	KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
-
-	buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &tp_count, sizeof(tp_count));
-
-	for (i = 0; i < tp_count; i++) {
-		pos = kbasep_tlstream_write_bytes(
-				buffer, pos,
-				&tp_desc[i].id, sizeof(tp_desc[i].id));
-		pos = kbasep_tlstream_write_string(
-				buffer, pos,
-				tp_desc[i].id_str, msg_size - pos);
-		pos = kbasep_tlstream_write_string(
-				buffer, pos,
-				tp_desc[i].name, msg_size - pos);
-		pos = kbasep_tlstream_write_string(
-				buffer, pos,
-				tp_desc[i].arg_types, msg_size - pos);
-		pos = kbasep_tlstream_write_string(
-				buffer, pos,
-				tp_desc[i].arg_names, msg_size - pos);
-	}
-
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(stream_type, flags);
-
-	/* We don't expect any more data to be read in this stream.
-	 * As header stream must be read before its associated body stream,
-	 * make this packet visible to the user straightaway. */
-	kbasep_tlstream_flush_stream(stream_type);
-}
-
-/*****************************************************************************/
-
-int kbase_tlstream_init(void)
-{
-	enum tl_stream_type i;
-
-	/* Prepare stream structures. */
-	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
-		tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
-		if (!tl_stream[i])
-			break;
-		kbasep_timeline_stream_init(tl_stream[i], i);
-	}
-	if (TL_STREAM_TYPE_COUNT > i) {
-		for (; i > 0; i--) {
-			kbasep_timeline_stream_term(tl_stream[i - 1]);
-			kfree(tl_stream[i - 1]);
-		}
-		return -ENOMEM;
-	}
-
-	/* Initialize autoflush timer. */
-	atomic_set(&autoflush_timer_active, 0);
-	kbase_timer_setup(&autoflush_timer,
-			  kbasep_tlstream_autoflush_timer_callback);
-
-	return 0;
-}
-
-void kbase_tlstream_term(void)
-{
-	enum tl_stream_type i;
-
-	for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
-		kbasep_timeline_stream_term(tl_stream[i]);
-		kfree(tl_stream[i]);
-	}
-}
-
-static void kbase_create_timeline_objects(struct kbase_context *kctx)
-{
-	struct kbase_device             *kbdev = kctx->kbdev;
-	unsigned int                    lpu_id;
-	unsigned int                    as_nr;
-	struct kbasep_kctx_list_element *element;
-
-	/* Create LPU objects. */
-	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
-		u32 *lpu =
-			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
-		KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, lpu_id, *lpu);
-	}
-
-	/* Create Address Space objects. */
-	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
-		KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(&kbdev->as[as_nr], as_nr);
-
-	/* Create GPU object and make it retain all LPUs and address spaces. */
-	KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(
-			kbdev,
-			kbdev->gpu_props.props.raw_props.gpu_id,
-			kbdev->gpu_props.num_cores);
-
-	for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
-		void *lpu =
-			&kbdev->gpu_props.props.raw_props.js_features[lpu_id];
-		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, kbdev);
-	}
-	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
-		KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(
-				&kbdev->as[as_nr],
-				kbdev);
-
-	/* Create object for each known context. */
-	mutex_lock(&kbdev->kctx_list_lock);
-	list_for_each_entry(element, &kbdev->kctx_list, link) {
-		KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(
-				element->kctx,
-				element->kctx->id,
-				(u32)(element->kctx->tgid));
-	}
-	/* Before releasing the lock, reset body stream buffers.
-	 * This will prevent context creation message to be directed to both
-	 * summary and body stream.
-	 */
-	kbase_tlstream_reset_body_streams();
-	mutex_unlock(&kbdev->kctx_list_lock);
-	/* Static object are placed into summary packet that needs to be
-	 * transmitted first. Flush all streams to make it available to
-	 * user space.
-	 */
-	kbase_tlstream_flush_streams();
-}
-
-int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags)
-{
-	int ret;
-	u32 tlstream_enabled = TLSTREAM_ENABLED | flags;
-
-	if (0 == atomic_cmpxchg(&kbase_tlstream_enabled, 0, tlstream_enabled)) {
-		int rcode;
-
-		ret = anon_inode_getfd(
-				"[mali_tlstream]",
-				&kbasep_tlstream_fops,
-				kctx,
-				O_RDONLY | O_CLOEXEC);
-		if (ret < 0) {
-			atomic_set(&kbase_tlstream_enabled, 0);
-			return ret;
-		}
-
-		/* Reset and initialize header streams. */
-		kbasep_timeline_stream_reset(
-				tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
-		kbasep_timeline_stream_reset(
-				tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
-		kbasep_timeline_stream_reset(
-				tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
-		kbasep_tlstream_timeline_header(
-				TL_STREAM_TYPE_OBJ_HEADER,
-				tp_desc_obj,
-				ARRAY_SIZE(tp_desc_obj));
-		kbasep_tlstream_timeline_header(
-				TL_STREAM_TYPE_AUX_HEADER,
-				tp_desc_aux,
-				ARRAY_SIZE(tp_desc_aux));
-
-		/* Start autoflush timer. */
-		atomic_set(&autoflush_timer_active, 1);
-		rcode = mod_timer(
-				&autoflush_timer,
-				jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
-		CSTD_UNUSED(rcode);
-
-		/* If job dumping is enabled, readjust the software event's
-		 * timeout as the default value of 3 seconds is often
-		 * insufficient. */
-		if (flags & BASE_TLSTREAM_JOB_DUMPING_ENABLED) {
-			dev_info(kctx->kbdev->dev,
-					"Job dumping is enabled, readjusting the software event's timeout\n");
-			atomic_set(&kctx->kbdev->js_data.soft_job_timeout_ms,
-					1800000);
-		}
-
-		/* Summary stream was cleared during acquire.
-		 * Create static timeline objects that will be
-		 * read by client.
-		 */
-		kbase_create_timeline_objects(kctx);
-
-	} else {
-		ret = -EBUSY;
-	}
-
-	return ret;
-}
-
-void kbase_tlstream_flush_streams(void)
-{
-	enum tl_stream_type stype;
-
-	for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
-		kbasep_tlstream_flush_stream(stype);
-}
-
-void kbase_tlstream_reset_body_streams(void)
-{
-	kbasep_timeline_stream_reset(
-			tl_stream[TL_STREAM_TYPE_OBJ]);
-	kbasep_timeline_stream_reset(
-			tl_stream[TL_STREAM_TYPE_AUX]);
-}
-
-#if MALI_UNIT_TEST
-void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
-{
-	KBASE_DEBUG_ASSERT(bytes_collected);
-	KBASE_DEBUG_ASSERT(bytes_generated);
-	*bytes_collected = atomic_read(&tlstream_bytes_collected);
-	*bytes_generated = atomic_read(&tlstream_bytes_generated);
-}
-#endif /* MALI_UNIT_TEST */
-
-/*****************************************************************************/
-
-void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid)
-{
-	const u32     msg_id = KBASE_TL_NEW_CTX;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
-		sizeof(tgid);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ_SUMMARY,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &context, sizeof(context));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &nr, sizeof(nr));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &tgid, sizeof(tgid));
-
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
-}
-
-void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
-{
-	const u32     msg_id = KBASE_TL_NEW_GPU;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
-		sizeof(core_count);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ_SUMMARY,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &gpu, sizeof(gpu));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &id, sizeof(id));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &core_count, sizeof(core_count));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
-}
-
-void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
-{
-	const u32     msg_id = KBASE_TL_NEW_LPU;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
-		sizeof(fn);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ_SUMMARY,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &lpu, sizeof(lpu));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &nr, sizeof(nr));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &fn, sizeof(fn));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
-}
-
-void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
-{
-	const u32     msg_id = KBASE_TL_LIFELINK_LPU_GPU;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ_SUMMARY,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &lpu, sizeof(lpu));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &gpu, sizeof(gpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
-}
-
-void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr)
-{
-	const u32     msg_id = KBASE_TL_NEW_AS;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(nr);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ_SUMMARY,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &as, sizeof(as));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &nr, sizeof(nr));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
-}
-
-void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu)
-{
-	const u32     msg_id = KBASE_TL_LIFELINK_AS_GPU;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(gpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ_SUMMARY,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &as, sizeof(as));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &gpu, sizeof(gpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
-}
-
-/*****************************************************************************/
-
-void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid)
-{
-	const u32     msg_id = KBASE_TL_NEW_CTX;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr) +
-		sizeof(tgid);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &context, sizeof(context));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &nr, sizeof(nr));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &tgid, sizeof(tgid));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_new_atom(void *atom, u32 nr)
-{
-	const u32     msg_id = KBASE_TL_NEW_ATOM;
-	const size_t  msg_size = sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
-			sizeof(nr);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &nr, sizeof(nr));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_del_ctx(void *context)
-{
-	const u32     msg_id = KBASE_TL_DEL_CTX;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(context);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &context, sizeof(context));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_del_atom(void *atom)
-{
-	const u32     msg_id = KBASE_TL_DEL_ATOM;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu)
-{
-	const u32     msg_id = KBASE_TL_RET_CTX_LPU;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &context, sizeof(context));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &lpu, sizeof(lpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
-{
-	const u32     msg_id = KBASE_TL_RET_ATOM_CTX;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &context, sizeof(context));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_ret_atom_lpu(
-		void *atom, void *lpu, const char *attrib_match_list)
-{
-	const u32     msg_id = KBASE_TL_RET_ATOM_LPU;
-	const size_t  msg_s0 = sizeof(u32) + sizeof(char) +
-			strnlen(attrib_match_list, STRLEN_MAX);
-	const size_t  msg_size =
-			sizeof(msg_id) + sizeof(u64) +
-			sizeof(atom) + sizeof(lpu) + msg_s0;
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &lpu, sizeof(lpu));
-	pos = kbasep_tlstream_write_string(
-			buffer, pos, attrib_match_list, msg_s0);
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu)
-{
-	const u32     msg_id = KBASE_TL_NRET_CTX_LPU;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(lpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &context, sizeof(context));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &lpu, sizeof(lpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
-{
-	const u32     msg_id = KBASE_TL_NRET_ATOM_CTX;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &context, sizeof(context));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2)
-{
-	const u32     msg_id = KBASE_TL_DEP_ATOM_ATOM;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom1, sizeof(atom1));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom2, sizeof(atom2));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2)
-{
-	const u32     msg_id = KBASE_TL_NDEP_ATOM_ATOM;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom1, sizeof(atom1));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom2, sizeof(atom2));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2)
-{
-	const u32     msg_id = KBASE_TL_RDEP_ATOM_ATOM;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom1) + sizeof(atom2);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom1, sizeof(atom1));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom2, sizeof(atom2));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
-{
-	const u32     msg_id = KBASE_TL_NRET_ATOM_LPU;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &lpu, sizeof(lpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx)
-{
-	const u32     msg_id = KBASE_TL_RET_AS_CTX;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &as, sizeof(as));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &ctx, sizeof(ctx));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx)
-{
-	const u32     msg_id = KBASE_TL_NRET_AS_CTX;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(as) + sizeof(ctx);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &as, sizeof(as));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &ctx, sizeof(ctx));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as)
-{
-	const u32     msg_id = KBASE_TL_RET_ATOM_AS;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &as, sizeof(as));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as)
-{
-	const u32     msg_id = KBASE_TL_NRET_ATOM_AS;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(as);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &as, sizeof(as));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_config(
-		void *atom, u64 jd, u64 affinity, u32 config)
-{
-	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) +
-		sizeof(jd) + sizeof(affinity) + sizeof(config);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &jd, sizeof(jd));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &affinity, sizeof(affinity));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &config, sizeof(config));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio)
-{
-	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(prio);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &prio, sizeof(prio));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state)
-{
-	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(state);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &state, sizeof(state));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom)
-{
-	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_jit(
-		void *atom, u64 edit_addr, u64 new_addr)
-{
-	const u32     msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom)
-		+ sizeof(edit_addr) + sizeof(new_addr);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &edit_addr, sizeof(edit_addr));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &new_addr, sizeof(new_addr));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_attrib_as_config(
-		void *as, u64 transtab, u64 memattr, u64 transcfg)
-{
-	const u32     msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(as) +
-		sizeof(transtab) + sizeof(memattr) + sizeof(transcfg);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &as, sizeof(as));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &transtab, sizeof(transtab));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &memattr, sizeof(memattr));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &transcfg, sizeof(transcfg));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_event_lpu_softstop(void *lpu)
-{
-	const u32     msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(lpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &lpu, sizeof(lpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom)
-{
-	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom)
-{
-	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_event_atom_softjob_start(void *atom)
-{
-	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_tl_event_atom_softjob_end(void *atom)
-{
-	const u32     msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(atom);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &atom, sizeof(atom));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-void __kbase_tlstream_jd_gpu_soft_reset(void *gpu)
-{
-	const u32     msg_id = KBASE_JD_GPU_SOFT_RESET;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_OBJ,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &gpu, sizeof(gpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
-}
-
-/*****************************************************************************/
-
-void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
-{
-	const u32     msg_id = KBASE_AUX_PM_STATE;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
-		sizeof(state);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &core_type, sizeof(core_type));
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
-
-void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change)
-{
-	const u32     msg_id = KBASE_AUX_PAGEFAULT;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
-		sizeof(page_count_change);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX, msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos,
-			&page_count_change, sizeof(page_count_change));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
-
-void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count)
-{
-	const u32     msg_id = KBASE_AUX_PAGESALLOC;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(ctx_nr) +
-		sizeof(page_count);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX, msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &ctx_nr, sizeof(ctx_nr));
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &page_count, sizeof(page_count));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
-
-void __kbase_tlstream_aux_devfreq_target(u64 target_freq)
-{
-	const u32       msg_id = KBASE_AUX_DEVFREQ_TARGET;
-	const size_t    msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(target_freq);
-	unsigned long   flags;
-	char            *buffer;
-	size_t          pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX, msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &target_freq, sizeof(target_freq));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
-
-void __kbase_tlstream_aux_protected_enter_start(void *gpu)
-{
-	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_START;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &gpu, sizeof(gpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
-void __kbase_tlstream_aux_protected_enter_end(void *gpu)
-{
-	const u32     msg_id = KBASE_AUX_PROTECTED_ENTER_END;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &gpu, sizeof(gpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
-
-void __kbase_tlstream_aux_protected_leave_start(void *gpu)
-{
-	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &gpu, sizeof(gpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
-void __kbase_tlstream_aux_protected_leave_end(void *gpu)
-{
-	const u32     msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
-	const size_t  msg_size =
-		sizeof(msg_id) + sizeof(u64) + sizeof(gpu);
-	unsigned long flags;
-	char          *buffer;
-	size_t        pos = 0;
-
-	buffer = kbasep_tlstream_msgbuf_acquire(
-			TL_STREAM_TYPE_AUX,
-			msg_size, &flags);
-	KBASE_DEBUG_ASSERT(buffer);
-
-	pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_tlstream_write_timestamp(buffer, pos);
-	pos = kbasep_tlstream_write_bytes(
-			buffer, pos, &gpu, sizeof(gpu));
-	KBASE_DEBUG_ASSERT(msg_size == pos);
-
-	kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
-}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
index bfa25d9..5797738 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,622 +23,145 @@
 #if !defined(_KBASE_TLSTREAM_H)
 #define _KBASE_TLSTREAM_H
 
-#include <mali_kbase.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
 
-/*****************************************************************************/
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE        4096 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#if defined(CONFIG_MALI_JOB_DUMP) || defined(CONFIG_MALI_VECTOR_DUMP)
+	#define PACKET_COUNT       64
+#else
+	#define PACKET_COUNT       32
+#endif
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX         64 /* bytes */
 
 /**
- * kbase_tlstream_init - initialize timeline infrastructure in kernel
- * Return: zero on success, negative number on error
- */
-int kbase_tlstream_init(void);
-
-/**
- * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ * struct kbase_tlstream - timeline stream structure
+ * @lock:              Message order lock
+ * @buffer:            Array of buffers
+ * @wbi:               Write buffer index
+ * @rbi:               Read buffer index
+ * @numbered:          If non-zero stream's packets are sequentially numbered
+ * @autoflush_counter: Counter tracking stream's autoflush state
+ * @ready_read:        Pointer to a wait queue, which is signaled when
+ *                     timeline messages are ready for collection.
+ * @bytes_generated:   Number of bytes generated by tracepoint messages
  *
- * Timeline need have to been previously enabled with kbase_tlstream_init().
- */
-void kbase_tlstream_term(void);
-
-/**
- * kbase_tlstream_acquire - acquire timeline stream file descriptor
- * @kctx:  kernel common context
- * @flags: timeline stream flags
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream.
  *
- * This descriptor is meant to be used by userspace timeline to gain access to
- * kernel timeline stream. This stream is later broadcasted by user space to the
- * timeline client.
- * Only one entity can own the descriptor at any given time. Descriptor shall be
- * closed if unused. If descriptor cannot be obtained (i.e. when it is already
- * being used) return will be a negative value.
+ * Each message in the sequence must bear a timestamp that is
+ * greater than the previous message in the same stream. For this reason
+ * a lock is held throughout the process of message creation.
  *
- * Return: file descriptor on success, negative number on error
- */
-int kbase_tlstream_acquire(struct kbase_context *kctx, u32 flags);
-
-/**
- * kbase_tlstream_flush_streams - flush timeline streams.
+ * Each stream contains a set of buffers. Each buffer will hold one MIPE
+ * packet. In case there is no free space required to store the incoming
+ * message the oldest buffer is discarded. Each packet in timeline body
+ * stream has a sequence number embedded, this value must increment
+ * monotonically and is used by the packets receiver to discover these
+ * buffer overflows.
  *
- * Function will flush pending data in all timeline streams.
+ * The autoflush counter is set to a negative number when there is no data
+ * pending for flush and it is set to zero on every update of the buffer. The
+ * autoflush timer will increment the counter by one on every expiry. If there
+ * is no activity on the buffer for two consecutive timer expiries, the stream
+ * buffer will be flushed.
  */
-void kbase_tlstream_flush_streams(void);
+struct kbase_tlstream {
+	spinlock_t lock;
 
-/**
- * kbase_tlstream_reset_body_streams - reset timeline body streams.
- *
- * Function will discard pending data in all timeline body streams.
- */
-void kbase_tlstream_reset_body_streams(void);
+	struct {
+		atomic_t size;              /* number of bytes in buffer */
+		char     data[PACKET_SIZE]; /* buffer's data */
+	} buffer[PACKET_COUNT];
 
+	atomic_t wbi;
+	atomic_t rbi;
+
+	int      numbered;
+	atomic_t autoflush_counter;
+	wait_queue_head_t *ready_read;
 #if MALI_UNIT_TEST
+	atomic_t bytes_generated;
+#endif
+};
+
+/* Types of streams generated by timeline. */
+enum tl_stream_type {
+	TL_STREAM_TYPE_FIRST,
+	TL_STREAM_TYPE_OBJ_SUMMARY = TL_STREAM_TYPE_FIRST,
+	TL_STREAM_TYPE_OBJ,
+	TL_STREAM_TYPE_AUX,
+
+	TL_STREAM_TYPE_COUNT
+};
+
 /**
- * kbase_tlstream_test - start timeline stream data generator
- * @tpw_count: number of trace point writers in each context
- * @msg_delay: time delay in milliseconds between trace points written by one
- *             writer
- * @msg_count: number of trace points written by one writer
- * @aux_msg:   if non-zero aux messages will be included
+ * kbase_tlstream_init - initialize timeline stream
+ * @stream:      Pointer to the stream structure
+ * @stream_type: Stream type
+ * @ready_read:  Pointer to a wait queue to signal when
+ *               timeline messages are ready for collection.
+ */
+void kbase_tlstream_init(struct kbase_tlstream *stream,
+	enum tl_stream_type stream_type,
+	wait_queue_head_t  *ready_read);
+
+/**
+ * kbase_tlstream_term - terminate timeline stream
+ * @stream: Pointer to the stream structure
+ */
+void kbase_tlstream_term(struct kbase_tlstream *stream);
+
+/**
+ * kbase_tlstream_reset - reset stream
+ * @stream:    Pointer to the stream structure
  *
- * This test starts a requested number of asynchronous writers in both IRQ and
- * thread context. Each writer will generate required number of test
- * tracepoints (tracepoints with embedded information about writer that
- * should be verified by user space reader). Tracepoints will be emitted in
- * all timeline body streams. If aux_msg is non-zero writer will also
- * generate not testable tracepoints (tracepoints without information about
- * writer). These tracepoints are used to check correctness of remaining
- * timeline message generating functions. Writer will wait requested time
- * between generating another set of messages. This call blocks until all
- * writers finish.
+ * Function discards all pending messages and resets packet counters.
  */
-void kbase_tlstream_test(
-		unsigned int tpw_count,
-		unsigned int msg_delay,
-		unsigned int msg_count,
-		int          aux_msg);
+void kbase_tlstream_reset(struct kbase_tlstream *stream);
 
 /**
- * kbase_tlstream_stats - read timeline stream statistics
- * @bytes_collected: will hold number of bytes read by the user
- * @bytes_generated: will hold number of bytes generated by trace points
- */
-void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
-#endif /* MALI_UNIT_TEST */
-
-/*****************************************************************************/
-
-#define TL_ATOM_STATE_IDLE 0
-#define TL_ATOM_STATE_READY 1
-#define TL_ATOM_STATE_DONE 2
-#define TL_ATOM_STATE_POSTED 3
-
-void __kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr, u32 tgid);
-void __kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
-void __kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
-void __kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
-void __kbase_tlstream_tl_summary_new_as(void *as, u32 nr);
-void __kbase_tlstream_tl_summary_lifelink_as_gpu(void *as, void *gpu);
-void __kbase_tlstream_tl_new_ctx(void *context, u32 nr, u32 tgid);
-void __kbase_tlstream_tl_new_atom(void *atom, u32 nr);
-void __kbase_tlstream_tl_del_ctx(void *context);
-void __kbase_tlstream_tl_del_atom(void *atom);
-void __kbase_tlstream_tl_ret_ctx_lpu(void *context, void *lpu);
-void __kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
-void __kbase_tlstream_tl_ret_atom_lpu(
-		void *atom, void *lpu, const char *attrib_match_list);
-void __kbase_tlstream_tl_nret_ctx_lpu(void *context, void *lpu);
-void __kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
-void __kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
-void __kbase_tlstream_tl_ret_as_ctx(void *as, void *ctx);
-void __kbase_tlstream_tl_nret_as_ctx(void *as, void *ctx);
-void __kbase_tlstream_tl_ret_atom_as(void *atom, void *as);
-void __kbase_tlstream_tl_nret_atom_as(void *atom, void *as);
-void __kbase_tlstream_tl_dep_atom_atom(void *atom1, void *atom2);
-void __kbase_tlstream_tl_ndep_atom_atom(void *atom1, void *atom2);
-void __kbase_tlstream_tl_rdep_atom_atom(void *atom1, void *atom2);
-void __kbase_tlstream_tl_attrib_atom_config(
-		void *atom, u64 jd, u64 affinity, u32 config);
-void __kbase_tlstream_tl_attrib_atom_priority(void *atom, u32 prio);
-void __kbase_tlstream_tl_attrib_atom_state(void *atom, u32 state);
-void __kbase_tlstream_tl_attrib_atom_prioritized(void *atom);
-void __kbase_tlstream_tl_attrib_atom_jit(
-		void *atom, u64 edit_addr, u64 new_addr);
-void __kbase_tlstream_tl_attrib_as_config(
-		void *as, u64 transtab, u64 memattr, u64 transcfg);
-void __kbase_tlstream_tl_event_atom_softstop_ex(void *atom);
-void __kbase_tlstream_tl_event_lpu_softstop(void *lpu);
-void __kbase_tlstream_tl_event_atom_softstop_issue(void *atom);
-void __kbase_tlstream_tl_event_atom_softjob_start(void *atom);
-void __kbase_tlstream_tl_event_atom_softjob_end(void *atom);
-void __kbase_tlstream_jd_gpu_soft_reset(void *gpu);
-void __kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
-void __kbase_tlstream_aux_pagefault(u32 ctx_nr, u64 page_count_change);
-void __kbase_tlstream_aux_pagesalloc(u32 ctx_nr, u64 page_count);
-void __kbase_tlstream_aux_devfreq_target(u64 target_freq);
-void __kbase_tlstream_aux_protected_enter_start(void *gpu);
-void __kbase_tlstream_aux_protected_enter_end(void *gpu);
-void __kbase_tlstream_aux_protected_leave_start(void *gpu);
-void __kbase_tlstream_aux_protected_leave_end(void *gpu);
-
-#define TLSTREAM_ENABLED (1 << 31)
-
-extern atomic_t kbase_tlstream_enabled;
-
-#define __TRACE_IF_ENABLED(trace_name, ...)                         \
-	do {                                                        \
-		int enabled = atomic_read(&kbase_tlstream_enabled); \
-		if (enabled & TLSTREAM_ENABLED)                     \
-			__kbase_tlstream_##trace_name(__VA_ARGS__); \
-	} while (0)
-
-#define __TRACE_IF_ENABLED_LATENCY(trace_name, ...)                     \
-	do {                                                            \
-		int enabled = atomic_read(&kbase_tlstream_enabled);     \
-		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS) \
-			__kbase_tlstream_##trace_name(__VA_ARGS__);     \
-	} while (0)
-
-#define __TRACE_IF_ENABLED_JD(trace_name, ...)                      \
-	do {                                                        \
-		int enabled = atomic_read(&kbase_tlstream_enabled); \
-		if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED)    \
-			__kbase_tlstream_##trace_name(__VA_ARGS__); \
-	} while (0)
-
-/*****************************************************************************/
-
-/**
- * KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX - create context object in timeline
- *                                     summary
- * @context: name of the context object
- * @nr:      context number
- * @tgid:    thread Group Id
+ * kbase_tlstream_msgbuf_acquire - lock selected stream and reserve a buffer
+ * @stream:      Pointer to the stream structure
+ * @msg_size:    Message size
+ * @flags:       Pointer to store flags passed back on stream release
  *
- * Function emits a timeline message informing about context creation. Context
- * is created with context number (its attribute), that can be used to link
- * kbase context with userspace context.
- * This message is directed to timeline summary stream.
- */
-#define KBASE_TLSTREAM_TL_SUMMARY_NEW_CTX(context, nr, tgid) \
-	__TRACE_IF_ENABLED(tl_summary_new_ctx, context, nr, tgid)
-
-/**
- * KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU - create GPU object in timeline summary
- * @gpu:        name of the GPU object
- * @id:         id value of this GPU
- * @core_count: number of cores this GPU hosts
+ * Lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
  *
- * Function emits a timeline message informing about GPU creation. GPU is
- * created with two attributes: id and core count.
- * This message is directed to timeline summary stream.
- */
-#define KBASE_TLSTREAM_TL_SUMMARY_NEW_GPU(gpu, id, core_count) \
-	__TRACE_IF_ENABLED(tl_summary_new_gpu, gpu, id, core_count)
-
-/**
- * KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU - create LPU object in timeline summary
- * @lpu: name of the Logical Processing Unit object
- * @nr:  sequential number assigned to this LPU
- * @fn:  property describing this LPU's functional abilities
+ * Return: pointer to the buffer where a message can be stored
  *
- * Function emits a timeline message informing about LPU creation. LPU is
- * created with two attributes: number linking this LPU with GPU's job slot
- * and function bearing information about this LPU abilities.
- * This message is directed to timeline summary stream.
+ * Warning: The stream must be released with kbase_tlstream_msgbuf_release().
+ *          Only atomic operations are allowed while the stream is locked
+ *          (i.e. do not use any operation that may sleep).
  */
-#define KBASE_TLSTREAM_TL_SUMMARY_NEW_LPU(lpu, nr, fn) \
-	__TRACE_IF_ENABLED(tl_summary_new_lpu, lpu, nr, fn)
+char *kbase_tlstream_msgbuf_acquire(struct kbase_tlstream *stream,
+	size_t msg_size, unsigned long *flags) __acquires(&stream->lock);
 
 /**
- * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU - lifelink LPU object to GPU
- * @lpu: name of the Logical Processing Unit object
- * @gpu: name of the GPU object
+ * kbase_tlstream_msgbuf_release - unlock selected stream
+ * @stream:    Pointer to the stream structure
+ * @flags:     Value obtained during stream acquire
  *
- * Function emits a timeline message informing that LPU object shall be deleted
- * along with GPU object.
- * This message is directed to timeline summary stream.
+ * Release the stream that has been previously
+ * locked with a call to kbase_tlstream_msgbuf_acquire().
  */
-#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_LPU_GPU(lpu, gpu) \
-	__TRACE_IF_ENABLED(tl_summary_lifelink_lpu_gpu, lpu, gpu)
+void kbase_tlstream_msgbuf_release(struct kbase_tlstream *stream,
+	unsigned long flags) __releases(&stream->lock);
 
 /**
- * KBASE_TLSTREAM_TL_SUMMARY_NEW_AS - create address space object in timeline summary
- * @as: name of the address space object
- * @nr: sequential number assigned to this address space
+ * kbase_tlstream_flush_stream - flush stream
+ * @stream:     Pointer to the stream structure
  *
- * Function emits a timeline message informing about address space creation.
- * Address space is created with one attribute: number identifying this
- * address space.
- * This message is directed to timeline summary stream.
+ * Flush pending data in the timeline stream.
  */
-#define KBASE_TLSTREAM_TL_SUMMARY_NEW_AS(as, nr) \
-	__TRACE_IF_ENABLED(tl_summary_new_as, as, nr)
-
-/**
- * KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU - lifelink address space object to GPU
- * @as:  name of the address space object
- * @gpu: name of the GPU object
- *
- * Function emits a timeline message informing that address space object
- * shall be deleted along with GPU object.
- * This message is directed to timeline summary stream.
- */
-#define KBASE_TLSTREAM_TL_SUMMARY_LIFELINK_AS_GPU(as, gpu) \
-	__TRACE_IF_ENABLED(tl_summary_lifelink_as_gpu, as, gpu)
-
-/**
- * KBASE_TLSTREAM_TL_NEW_CTX - create context object in timeline
- * @context: name of the context object
- * @nr:      context number
- * @tgid:    thread Group Id
- *
- * Function emits a timeline message informing about context creation. Context
- * is created with context number (its attribute), that can be used to link
- * kbase context with userspace context.
- */
-#define KBASE_TLSTREAM_TL_NEW_CTX(context, nr, tgid) \
-	__TRACE_IF_ENABLED(tl_new_ctx, context, nr, tgid)
-
-/**
- * KBASE_TLSTREAM_TL_NEW_ATOM - create atom object in timeline
- * @atom: name of the atom object
- * @nr:   sequential number assigned to this atom
- *
- * Function emits a timeline message informing about atom creation. Atom is
- * created with atom number (its attribute) that links it with actual work
- * bucket id understood by hardware.
- */
-#define KBASE_TLSTREAM_TL_NEW_ATOM(atom, nr) \
-	__TRACE_IF_ENABLED(tl_new_atom, atom, nr)
-
-/**
- * KBASE_TLSTREAM_TL_DEL_CTX - destroy context object in timeline
- * @context: name of the context object
- *
- * Function emits a timeline message informing that context object ceased to
- * exist.
- */
-#define KBASE_TLSTREAM_TL_DEL_CTX(context) \
-	__TRACE_IF_ENABLED(tl_del_ctx, context)
-
-/**
- * KBASE_TLSTREAM_TL_DEL_ATOM - destroy atom object in timeline
- * @atom: name of the atom object
- *
- * Function emits a timeline message informing that atom object ceased to
- * exist.
- */
-#define KBASE_TLSTREAM_TL_DEL_ATOM(atom) \
-	__TRACE_IF_ENABLED(tl_del_atom, atom)
-
-/**
- * KBASE_TLSTREAM_TL_RET_CTX_LPU - retain context by LPU
- * @context: name of the context object
- * @lpu:     name of the Logical Processing Unit object
- *
- * Function emits a timeline message informing that context is being held
- * by LPU and must not be deleted unless it is released.
- */
-#define KBASE_TLSTREAM_TL_RET_CTX_LPU(context, lpu) \
-	__TRACE_IF_ENABLED(tl_ret_ctx_lpu, context, lpu)
-
-/**
- * KBASE_TLSTREAM_TL_RET_ATOM_CTX - retain atom by context
- * @atom:    name of the atom object
- * @context: name of the context object
- *
- * Function emits a timeline message informing that atom object is being held
- * by context and must not be deleted unless it is released.
- */
-#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(atom, context) \
-	__TRACE_IF_ENABLED(tl_ret_atom_ctx, atom, context)
-
-/**
- * KBASE_TLSTREAM_TL_RET_ATOM_LPU - retain atom by LPU
- * @atom:              name of the atom object
- * @lpu:               name of the Logical Processing Unit object
- * @attrib_match_list: list containing match operator attributes
- *
- * Function emits a timeline message informing that atom object is being held
- * by LPU and must not be deleted unless it is released.
- */
-#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(atom, lpu, attrib_match_list) \
-	__TRACE_IF_ENABLED(tl_ret_atom_lpu, atom, lpu, attrib_match_list)
-
-/**
- * KBASE_TLSTREAM_TL_NRET_CTX_LPU - release context by LPU
- * @context: name of the context object
- * @lpu:     name of the Logical Processing Unit object
- *
- * Function emits a timeline message informing that context is being released
- * by LPU object.
- */
-#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(context, lpu) \
-	__TRACE_IF_ENABLED(tl_nret_ctx_lpu, context, lpu)
-
-/**
- * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - release atom by context
- * @atom:    name of the atom object
- * @context: name of the context object
- *
- * Function emits a timeline message informing that atom object is being
- * released by context.
- */
-#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(atom, context) \
-	__TRACE_IF_ENABLED(tl_nret_atom_ctx, atom, context)
-
-/**
- * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - release atom by LPU
- * @atom: name of the atom object
- * @lpu:  name of the Logical Processing Unit object
- *
- * Function emits a timeline message informing that atom object is being
- * released by LPU.
- */
-#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(atom, lpu) \
-	__TRACE_IF_ENABLED(tl_nret_atom_lpu, atom, lpu)
-
-/**
- * KBASE_TLSTREAM_TL_RET_AS_CTX - lifelink address space object to context
- * @as:  name of the address space object
- * @ctx: name of the context object
- *
- * Function emits a timeline message informing that address space object
- * is being held by the context object.
- */
-#define KBASE_TLSTREAM_TL_RET_AS_CTX(as, ctx) \
-	__TRACE_IF_ENABLED(tl_ret_as_ctx, as, ctx)
-
-/**
- * KBASE_TLSTREAM_TL_NRET_AS_CTX - release address space by context
- * @as:  name of the address space object
- * @ctx: name of the context object
- *
- * Function emits a timeline message informing that address space object
- * is being released by atom.
- */
-#define KBASE_TLSTREAM_TL_NRET_AS_CTX(as, ctx) \
-	__TRACE_IF_ENABLED(tl_nret_as_ctx, as, ctx)
-
-/**
- * KBASE_TLSTREAM_TL_RET_ATOM_AS - retain atom by address space
- * @atom: name of the atom object
- * @as:   name of the address space object
- *
- * Function emits a timeline message informing that atom object is being held
- * by address space and must not be deleted unless it is released.
- */
-#define KBASE_TLSTREAM_TL_RET_ATOM_AS(atom, as) \
-	__TRACE_IF_ENABLED(tl_ret_atom_as, atom, as)
-
-/**
- * KBASE_TLSTREAM_TL_NRET_ATOM_AS - release atom by address space
- * @atom: name of the atom object
- * @as:   name of the address space object
- *
- * Function emits a timeline message informing that atom object is being
- * released by address space.
- */
-#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(atom, as) \
-	__TRACE_IF_ENABLED(tl_nret_atom_as, atom, as)
-
-/**
- * KBASE_TLSTREAM_TL_DEP_ATOM_ATOM - parent atom depends on child atom
- * @atom1: name of the child atom object
- * @atom2: name of the parent atom object that depends on child atom
- *
- * Function emits a timeline message informing that parent atom waits for
- * child atom object to be completed before start its execution.
- */
-#define KBASE_TLSTREAM_TL_DEP_ATOM_ATOM(atom1, atom2) \
-	__TRACE_IF_ENABLED(tl_dep_atom_atom, atom1, atom2)
-
-/**
- * KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM - dependency between atoms resolved
- * @atom1: name of the child atom object
- * @atom2: name of the parent atom object that depended on child atom
- *
- * Function emits a timeline message informing that parent atom execution
- * dependency on child atom has been resolved.
- */
-#define KBASE_TLSTREAM_TL_NDEP_ATOM_ATOM(atom1, atom2) \
-	__TRACE_IF_ENABLED(tl_ndep_atom_atom, atom1, atom2)
-
-/**
- * KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM - information about already resolved dependency between atoms
- * @atom1: name of the child atom object
- * @atom2: name of the parent atom object that depended on child atom
- *
- * Function emits a timeline message informing that parent atom execution
- * dependency on child atom has been resolved.
- */
-#define KBASE_TLSTREAM_TL_RDEP_ATOM_ATOM(atom1, atom2) \
-	__TRACE_IF_ENABLED(tl_rdep_atom_atom, atom1, atom2)
-
-/**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes
- * @atom:     name of the atom object
- * @jd:       job descriptor address
- * @affinity: job affinity
- * @config:   job config
- *
- * Function emits a timeline message containing atom attributes.
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(atom, jd, affinity, config) \
-	__TRACE_IF_ENABLED(tl_attrib_atom_config, atom, jd, affinity, config)
-
-/**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority
- * @atom: name of the atom object
- * @prio: atom priority
- *
- * Function emits a timeline message containing atom priority.
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(atom, prio) \
-	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_priority, atom, prio)
-
-/**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state
- * @atom:  name of the atom object
- * @state: atom state
- *
- * Function emits a timeline message containing atom state.
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(atom, state) \
-	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_state, atom, state)
-
-/**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - atom was prioritized
- * @atom:  name of the atom object
- *
- * Function emits a timeline message signalling priority change
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(atom) \
-	__TRACE_IF_ENABLED_LATENCY(tl_attrib_atom_prioritized, atom)
-
-/**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit happened on atom
- * @atom:       atom identifier
- * @edit_addr:  address edited by jit
- * @new_addr:   address placed into the edited location
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(atom, edit_addr, new_addr) \
-	__TRACE_IF_ENABLED_JD(tl_attrib_atom_jit, atom, edit_addr, new_addr)
-
-/**
- * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes
- * @as:       assigned address space
- * @transtab: configuration of the TRANSTAB register
- * @memattr:  configuration of the MEMATTR register
- * @transcfg: configuration of the TRANSCFG register (or zero if not present)
- *
- * Function emits a timeline message containing address space attributes.
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(as, transtab, memattr, transcfg) \
-	__TRACE_IF_ENABLED(tl_attrib_as_config, as, transtab, memattr, transcfg)
-
-/**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX
- * @atom:       atom identifier
- */
-#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(atom) \
-	__TRACE_IF_ENABLED(tl_event_atom_softstop_ex, atom)
-
-/**
- * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP
- * @lpu:        name of the LPU object
- */
-#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(lpu) \
-	__TRACE_IF_ENABLED(tl_event_lpu_softstop, lpu)
-
-/**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE
- * @atom:       atom identifier
- */
-#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(atom) \
-	__TRACE_IF_ENABLED(tl_event_atom_softstop_issue, atom)
-
-/**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START
- * @atom:       atom identifier
- */
-#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(atom) \
-	__TRACE_IF_ENABLED(tl_event_atom_softjob_start, atom)
-
-/**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END
- * @atom:       atom identifier
- */
-#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(atom) \
-	__TRACE_IF_ENABLED(tl_event_atom_softjob_end, atom)
-
-/**
- * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - The GPU is being soft reset
- * @gpu:        name of the GPU object
- *
- * This imperative tracepoint is specific to job dumping.
- * Function emits a timeline message indicating GPU soft reset.
- */
-#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(gpu) \
-	__TRACE_IF_ENABLED(jd_gpu_soft_reset, gpu)
-
-
-/**
- * KBASE_TLSTREAM_AUX_PM_STATE - timeline message: power management state
- * @core_type: core type (shader, tiler, l2 cache, l3 cache)
- * @state:     64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
- */
-#define KBASE_TLSTREAM_AUX_PM_STATE(core_type, state) \
-	__TRACE_IF_ENABLED(aux_pm_state, core_type, state)
-
-/**
- * KBASE_TLSTREAM_AUX_PAGEFAULT - timeline message: MMU page fault event
- *                                resulting in new pages being mapped
- * @ctx_nr:            kernel context number
- * @page_count_change: number of pages to be added
- */
-#define KBASE_TLSTREAM_AUX_PAGEFAULT(ctx_nr, page_count_change) \
-	__TRACE_IF_ENABLED(aux_pagefault, ctx_nr, page_count_change)
-
-/**
- * KBASE_TLSTREAM_AUX_PAGESALLOC - timeline message: total number of allocated
- *                                 pages is changed
- * @ctx_nr:     kernel context number
- * @page_count: number of pages used by the context
- */
-#define KBASE_TLSTREAM_AUX_PAGESALLOC(ctx_nr, page_count) \
-	__TRACE_IF_ENABLED(aux_pagesalloc, ctx_nr, page_count)
-
-/**
- * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - timeline message: new target DVFS
- *                                     frequency
- * @target_freq: new target frequency
- */
-#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(target_freq) \
-	__TRACE_IF_ENABLED(aux_devfreq_target, target_freq)
-
-/**
- * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - The GPU has started transitioning
- *                                            to protected mode
- * @gpu: name of the GPU object
- *
- * Function emits a timeline message indicating the GPU is starting to
- * transition to protected mode.
- */
-#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(gpu) \
-	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_start, gpu)
-
-/**
- * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - The GPU has finished transitioning
- *                                          to protected mode
- * @gpu: name of the GPU object
- *
- * Function emits a timeline message indicating the GPU has finished
- * transitioning to protected mode.
- */
-#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(gpu) \
-	__TRACE_IF_ENABLED_LATENCY(aux_protected_enter_end, gpu)
-
-/**
- * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - The GPU has started transitioning
- *                                            to non-protected mode
- * @gpu: name of the GPU object
- *
- * Function emits a timeline message indicating the GPU is starting to
- * transition to non-protected mode.
- */
-#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(gpu) \
-	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_start, gpu)
-
-/**
- * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - The GPU has finished transitioning
- *                                          to non-protected mode
- * @gpu: name of the GPU object
- *
- * Function emits a timeline message indicating the GPU has finished
- * transitioning to non-protected mode.
- */
-#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(gpu) \
-	__TRACE_IF_ENABLED_LATENCY(aux_protected_leave_end, gpu)
+void kbase_tlstream_flush_stream(struct kbase_tlstream *stream);
 
 #endif /* _KBASE_TLSTREAM_H */
 
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
index d7364d5..77fb818 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -172,8 +172,6 @@
 	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
 	/* gpu_addr==value to write into JS_HEAD */
 	KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
-	/* kctx is the one being evicted, info_val == kctx to put in  */
-	KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
 	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
 	/* info_val == lower 32 bits of affinity */
 	KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tracepoints.c b/drivers/gpu/arm/midgard/mali_kbase_tracepoints.c
new file mode 100644
index 0000000..2c55127
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_tracepoints.c
@@ -0,0 +1,2836 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
+ * DO NOT EDIT.
+ */
+
+#include "mali_kbase_tracepoints.h"
+#include "mali_kbase_tlstream.h"
+#include "mali_kbase_tl_serialize.h"
+
+/* clang-format off */
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id_obj {
+	KBASE_TL_NEW_CTX,
+	KBASE_TL_NEW_GPU,
+	KBASE_TL_NEW_LPU,
+	KBASE_TL_NEW_ATOM,
+	KBASE_TL_NEW_AS,
+	KBASE_TL_DEL_CTX,
+	KBASE_TL_DEL_ATOM,
+	KBASE_TL_LIFELINK_LPU_GPU,
+	KBASE_TL_LIFELINK_AS_GPU,
+	KBASE_TL_RET_CTX_LPU,
+	KBASE_TL_RET_ATOM_CTX,
+	KBASE_TL_RET_ATOM_LPU,
+	KBASE_TL_NRET_CTX_LPU,
+	KBASE_TL_NRET_ATOM_CTX,
+	KBASE_TL_NRET_ATOM_LPU,
+	KBASE_TL_RET_AS_CTX,
+	KBASE_TL_NRET_AS_CTX,
+	KBASE_TL_RET_ATOM_AS,
+	KBASE_TL_NRET_ATOM_AS,
+	KBASE_TL_ATTRIB_ATOM_CONFIG,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
+	KBASE_TL_ATTRIB_ATOM_JIT,
+	KBASE_TL_JIT_USEDPAGES,
+	KBASE_TL_ATTRIB_ATOM_JITALLOCINFO,
+	KBASE_TL_ATTRIB_ATOM_JITFREEINFO,
+	KBASE_TL_ATTRIB_AS_CONFIG,
+	KBASE_TL_EVENT_LPU_SOFTSTOP,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_EX,
+	KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_START,
+	KBASE_TL_EVENT_ATOM_SOFTJOB_END,
+	KBASE_JD_GPU_SOFT_RESET,
+	KBASE_TL_NEW_KCPUQUEUE,
+	KBASE_TL_RET_KCPUQUEUE_CTX,
+	KBASE_TL_DEL_KCPUQUEUE,
+	KBASE_TL_NRET_KCPUQUEUE_CTX,
+	KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL,
+	KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY,
+	KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT,
+	KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START,
+	KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END,
+	KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END,
+	KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END,
+	KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER,
+	KBASE_OBJ_MSG_COUNT,
+};
+
+/* Message ids of trace events that are recorded in the auxiliary stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET,
+	KBASE_AUX_PROTECTED_ENTER_START,
+	KBASE_AUX_PROTECTED_ENTER_END,
+	KBASE_AUX_PROTECTED_LEAVE_START,
+	KBASE_AUX_PROTECTED_LEAVE_END,
+	KBASE_AUX_JIT_STATS,
+	KBASE_AUX_EVENT_JOB_SLOT,
+	KBASE_AUX_MSG_COUNT,
+};
+
+#define OBJ_TL_LIST \
+	TP_DESC(KBASE_TL_NEW_CTX, \
+		"object ctx is created", \
+		"@pII", \
+		"ctx,ctx_nr,tgid") \
+	TP_DESC(KBASE_TL_NEW_GPU, \
+		"object gpu is created", \
+		"@pII", \
+		"gpu,gpu_id,core_count") \
+	TP_DESC(KBASE_TL_NEW_LPU, \
+		"object lpu is created", \
+		"@pII", \
+		"lpu,lpu_nr,lpu_fn") \
+	TP_DESC(KBASE_TL_NEW_ATOM, \
+		"object atom is created", \
+		"@pI", \
+		"atom,atom_nr") \
+	TP_DESC(KBASE_TL_NEW_AS, \
+		"address space object is created", \
+		"@pI", \
+		"address_space,as_nr") \
+	TP_DESC(KBASE_TL_DEL_CTX, \
+		"context is destroyed", \
+		"@p", \
+		"ctx") \
+	TP_DESC(KBASE_TL_DEL_ATOM, \
+		"atom is destroyed", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_LIFELINK_LPU_GPU, \
+		"lpu is deleted with gpu", \
+		"@pp", \
+		"lpu,gpu") \
+	TP_DESC(KBASE_TL_LIFELINK_AS_GPU, \
+		"address space is deleted with gpu", \
+		"@pp", \
+		"address_space,gpu") \
+	TP_DESC(KBASE_TL_RET_CTX_LPU, \
+		"context is retained by lpu", \
+		"@pp", \
+		"ctx,lpu") \
+	TP_DESC(KBASE_TL_RET_ATOM_CTX, \
+		"atom is retained by context", \
+		"@pp", \
+		"atom,ctx") \
+	TP_DESC(KBASE_TL_RET_ATOM_LPU, \
+		"atom is retained by lpu", \
+		"@pps", \
+		"atom,lpu,attrib_match_list") \
+	TP_DESC(KBASE_TL_NRET_CTX_LPU, \
+		"context is released by lpu", \
+		"@pp", \
+		"ctx,lpu") \
+	TP_DESC(KBASE_TL_NRET_ATOM_CTX, \
+		"atom is released by context", \
+		"@pp", \
+		"atom,ctx") \
+	TP_DESC(KBASE_TL_NRET_ATOM_LPU, \
+		"atom is released by lpu", \
+		"@pp", \
+		"atom,lpu") \
+	TP_DESC(KBASE_TL_RET_AS_CTX, \
+		"address space is retained by context", \
+		"@pp", \
+		"address_space,ctx") \
+	TP_DESC(KBASE_TL_NRET_AS_CTX, \
+		"address space is released by context", \
+		"@pp", \
+		"address_space,ctx") \
+	TP_DESC(KBASE_TL_RET_ATOM_AS, \
+		"atom is retained by address space", \
+		"@pp", \
+		"atom,address_space") \
+	TP_DESC(KBASE_TL_NRET_ATOM_AS, \
+		"atom is released by address space", \
+		"@pp", \
+		"atom,address_space") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_CONFIG, \
+		"atom job slot attributes", \
+		"@pLLI", \
+		"atom,descriptor,affinity,config") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \
+		"atom priority", \
+		"@pI", \
+		"atom,prio") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \
+		"atom state", \
+		"@pI", \
+		"atom,state") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \
+		"atom caused priority change", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \
+		"jit done for atom", \
+		"@pLLILILLL", \
+		"atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \
+	TP_DESC(KBASE_TL_JIT_USEDPAGES, \
+		"used pages for jit", \
+		"@LI", \
+		"used_pages,j_id") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_JITALLOCINFO, \
+		"Information about JIT allocations", \
+		"@pLLLIIIII", \
+		"atom,va_pgs,com_pgs,extent,j_id,bin_id,max_allocs,jit_flags,usg_id") \
+	TP_DESC(KBASE_TL_ATTRIB_ATOM_JITFREEINFO, \
+		"Information about JIT frees", \
+		"@pI", \
+		"atom,j_id") \
+	TP_DESC(KBASE_TL_ATTRIB_AS_CONFIG, \
+		"address space attributes", \
+		"@pLLL", \
+		"address_space,transtab,memattr,transcfg") \
+	TP_DESC(KBASE_TL_EVENT_LPU_SOFTSTOP, \
+		"softstop event on given lpu", \
+		"@p", \
+		"lpu") \
+	TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_EX, \
+		"atom softstopped", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE, \
+		"atom softstop issued", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_START, \
+		"atom soft job has started", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_TL_EVENT_ATOM_SOFTJOB_END, \
+		"atom soft job has completed", \
+		"@p", \
+		"atom") \
+	TP_DESC(KBASE_JD_GPU_SOFT_RESET, \
+		"gpu soft reset", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_TL_NEW_KCPUQUEUE, \
+		"New KCPU Queue", \
+		"@ppI", \
+		"kcpu_queue,ctx,kcpuq_num_pending_cmds") \
+	TP_DESC(KBASE_TL_RET_KCPUQUEUE_CTX, \
+		"Context retains KCPU Queue", \
+		"@pp", \
+		"kcpu_queue,ctx") \
+	TP_DESC(KBASE_TL_DEL_KCPUQUEUE, \
+		"Delete KCPU Queue", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_NRET_KCPUQUEUE_CTX, \
+		"Context releases KCPU Queue", \
+		"@pp", \
+		"kcpu_queue,ctx") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL, \
+		"KCPU Queue enqueues Signal on Fence", \
+		"@pL", \
+		"kcpu_queue,fence") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT, \
+		"KCPU Queue enqueues Wait on Fence", \
+		"@pL", \
+		"kcpu_queue,fence") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
+		"Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
+		"Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object", \
+		"@pLI", \
+		"kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
+		"End array of KCPU Queue enqueues Wait on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET, \
+		"Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET, \
+		"Array item of KCPU Queue enqueues Set on Cross Queue Sync Object", \
+		"@pL", \
+		"kcpu_queue,cqs_obj_gpu_addr") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET, \
+		"End array of KCPU Queue enqueues Set on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
+		"Begin array of KCPU Queue enqueues Debug Copy", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
+		"Array item of KCPU Queue enqueues Debug Copy", \
+		"@pL", \
+		"kcpu_queue,debugcopy_dst_size") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY, \
+		"End array of KCPU Queue enqueues Debug Copy", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \
+		"KCPU Queue enqueues Map Import", \
+		"@pL", \
+		"kcpu_queue,map_import_buf_gpu_addr") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT, \
+		"KCPU Queue enqueues Unmap Import", \
+		"@pL", \
+		"kcpu_queue,map_import_buf_gpu_addr") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
+		"Begin array of KCPU Queue enqueues JIT Alloc", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
+		"Array item of KCPU Queue enqueues JIT Alloc", \
+		"@pLLLLIIIII", \
+		"kcpu_queue,jit_alloc_gpu_alloc_addr_dest,jit_alloc_va_pages,jit_alloc_commit_pages,jit_alloc_extent,jit_alloc_jit_id,jit_alloc_bin_id,jit_alloc_max_allocations,jit_alloc_flags,jit_alloc_usage_id") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
+		"End array of KCPU Queue enqueues JIT Alloc", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE, \
+		"Begin array of KCPU Queue enqueues JIT Free", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE, \
+		"Array item of KCPU Queue enqueues JIT Free", \
+		"@pI", \
+		"kcpu_queue,jit_alloc_jit_id") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE, \
+		"End array of KCPU Queue enqueues JIT Free", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START, \
+		"KCPU Queue starts a Signal on Fence", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \
+		"KCPU Queue ends a Signal on Fence", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \
+		"KCPU Queue starts a Wait on Fence", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \
+		"KCPU Queue ends a Wait on Fence", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \
+		"KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \
+		"KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START, \
+		"KCPU Queue starts a Set on an array of Cross Queue Sync Objects", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END, \
+		"KCPU Queue ends a Set on an array of Cross Queue Sync Objects", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START, \
+		"KCPU Queue starts an array of Debug Copys", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END, \
+		"KCPU Queue ends an array of Debug Copys", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
+		"KCPU Queue starts a Map Import", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \
+		"KCPU Queue ends a Map Import", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \
+		"KCPU Queue starts an Unmap Import", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \
+		"KCPU Queue ends an Unmap Import", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \
+		"KCPU Queue starts an array of JIT Allocs", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
+		"Begin array of KCPU Queue ends an array of JIT Allocs", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
+		"Array item of KCPU Queue ends an array of JIT Allocs", \
+		"@pLL", \
+		"kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
+		"End array of KCPU Queue ends an array of JIT Allocs", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START, \
+		"KCPU Queue starts an array of JIT Frees", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
+		"Begin array of KCPU Queue ends an array of JIT Frees", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
+		"Array item of KCPU Queue ends an array of JIT Frees", \
+		"@pL", \
+		"kcpu_queue,jit_free_pages_used") \
+	TP_DESC(KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
+		"End array of KCPU Queue ends an array of JIT Frees", \
+		"@p", \
+		"kcpu_queue") \
+	TP_DESC(KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER, \
+		"KCPU Queue executes an Error Barrier", \
+		"@p", \
+		"kcpu_queue") \
+
+#define MIPE_HEADER_BLOB_VAR_NAME    __obj_desc_header
+#define MIPE_HEADER_TP_LIST          OBJ_TL_LIST
+#define MIPE_HEADER_TP_LIST_COUNT    KBASE_OBJ_MSG_COUNT
+#define MIPE_HEADER_PKT_CLASS        TL_PACKET_CLASS_OBJ
+
+#include "mali_kbase_mipe_gen_header.h"
+
+const char   *obj_desc_header = (const char *) &__obj_desc_header;
+const size_t  obj_desc_header_size = sizeof(__obj_desc_header);
+
+#define AUX_TL_LIST \
+	TP_DESC(KBASE_AUX_PM_STATE, \
+		"PM state", \
+		"@IL", \
+		"core_type,core_state_bitset") \
+	TP_DESC(KBASE_AUX_PAGEFAULT, \
+		"Page fault", \
+		"@IIL", \
+		"ctx_nr,as_nr,page_cnt_change") \
+	TP_DESC(KBASE_AUX_PAGESALLOC, \
+		"Total alloc pages change", \
+		"@IL", \
+		"ctx_nr,page_cnt") \
+	TP_DESC(KBASE_AUX_DEVFREQ_TARGET, \
+		"New device frequency target", \
+		"@L", \
+		"target_freq") \
+	TP_DESC(KBASE_AUX_PROTECTED_ENTER_START, \
+		"enter protected mode start", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_AUX_PROTECTED_ENTER_END, \
+		"enter protected mode end", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \
+		"leave protected mode start", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \
+		"leave protected mode end", \
+		"@p", \
+		"gpu") \
+	TP_DESC(KBASE_AUX_JIT_STATS, \
+		"per-bin JIT statistics", \
+		"@IIIIII", \
+		"ctx_nr,bid,max_allocs,allocs,va_pages,ph_pages") \
+	TP_DESC(KBASE_AUX_EVENT_JOB_SLOT, \
+		"event on a given job slot", \
+		"@pIII", \
+		"ctx,slot_nr,atom_nr,event") \
+
+#define MIPE_HEADER_BLOB_VAR_NAME    __aux_desc_header
+#define MIPE_HEADER_TP_LIST          AUX_TL_LIST
+#define MIPE_HEADER_TP_LIST_COUNT    KBASE_AUX_MSG_COUNT
+#define MIPE_HEADER_PKT_CLASS        TL_PACKET_CLASS_AUX
+
+#include "mali_kbase_mipe_gen_header.h"
+
+const char   *aux_desc_header = (const char *) &__aux_desc_header;
+const size_t  aux_desc_header_size = sizeof(__aux_desc_header);
+
+void __kbase_tlstream_tl_new_ctx(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	u32 ctx_nr,
+	u32 tgid)
+{
+	const u32 msg_id = KBASE_TL_NEW_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		+ sizeof(ctx_nr)
+		+ sizeof(tgid)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &tgid, sizeof(tgid));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_gpu(
+	struct kbase_tlstream *stream,
+	const void *gpu,
+	u32 gpu_id,
+	u32 core_count)
+{
+	const u32 msg_id = KBASE_TL_NEW_GPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		+ sizeof(gpu_id)
+		+ sizeof(core_count)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu_id, sizeof(gpu_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &core_count, sizeof(core_count));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_lpu(
+	struct kbase_tlstream *stream,
+	const void *lpu,
+	u32 lpu_nr,
+	u32 lpu_fn)
+{
+	const u32 msg_id = KBASE_TL_NEW_LPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(lpu)
+		+ sizeof(lpu_nr)
+		+ sizeof(lpu_fn)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu_nr, sizeof(lpu_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu_fn, sizeof(lpu_fn));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_atom(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 atom_nr)
+{
+	const u32 msg_id = KBASE_TL_NEW_ATOM;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(atom_nr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom_nr, sizeof(atom_nr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_as(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	u32 as_nr)
+{
+	const u32 msg_id = KBASE_TL_NEW_AS;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(as_nr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &as_nr, sizeof(as_nr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_del_ctx(
+	struct kbase_tlstream *stream,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_DEL_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_del_atom(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_DEL_ATOM;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_lifelink_lpu_gpu(
+	struct kbase_tlstream *stream,
+	const void *lpu,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(lpu)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_lifelink_as_gpu(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_ctx_lpu(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	const void *lpu)
+{
+	const u32 msg_id = KBASE_TL_RET_CTX_LPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		+ sizeof(lpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_ctx(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_RET_ATOM_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_lpu(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *lpu,
+	const char *attrib_match_list)
+{
+	const u32 msg_id = KBASE_TL_RET_ATOM_LPU;
+	const size_t s0 = sizeof(u32) + sizeof(char)
+		+ strnlen(attrib_match_list, STRLEN_MAX);
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(lpu)
+		+ s0
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+	pos = kbasep_serialize_string(buffer,
+		pos, attrib_match_list, s0);
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_ctx_lpu(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	const void *lpu)
+{
+	const u32 msg_id = KBASE_TL_NRET_CTX_LPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		+ sizeof(lpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_ctx(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_NRET_ATOM_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_lpu(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *lpu)
+{
+	const u32 msg_id = KBASE_TL_NRET_ATOM_LPU;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(lpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_as_ctx(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_RET_AS_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_as_ctx(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_NRET_AS_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_atom_as(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *address_space)
+{
+	const u32 msg_id = KBASE_TL_RET_ATOM_AS;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(address_space)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_atom_as(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *address_space)
+{
+	const u32 msg_id = KBASE_TL_NRET_ATOM_AS;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(address_space)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_config(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 descriptor,
+	u64 affinity,
+	u32 config)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(descriptor)
+		+ sizeof(affinity)
+		+ sizeof(config)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &descriptor, sizeof(descriptor));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &affinity, sizeof(affinity));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &config, sizeof(config));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 prio)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(prio)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &prio, sizeof(prio));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_state(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 state)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(state)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &state, sizeof(state));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_prioritized(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jit(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 edit_addr,
+	u64 new_addr,
+	u32 jit_flags,
+	u64 mem_flags,
+	u32 j_id,
+	u64 com_pgs,
+	u64 extent,
+	u64 va_pgs)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(edit_addr)
+		+ sizeof(new_addr)
+		+ sizeof(jit_flags)
+		+ sizeof(mem_flags)
+		+ sizeof(j_id)
+		+ sizeof(com_pgs)
+		+ sizeof(extent)
+		+ sizeof(va_pgs)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &edit_addr, sizeof(edit_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &new_addr, sizeof(new_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_flags, sizeof(jit_flags));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &mem_flags, sizeof(mem_flags));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &j_id, sizeof(j_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &com_pgs, sizeof(com_pgs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &extent, sizeof(extent));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pgs, sizeof(va_pgs));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_jit_usedpages(
+	struct kbase_tlstream *stream,
+	u64 used_pages,
+	u32 j_id)
+{
+	const u32 msg_id = KBASE_TL_JIT_USEDPAGES;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(used_pages)
+		+ sizeof(j_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &used_pages, sizeof(used_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &j_id, sizeof(j_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jitallocinfo(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 va_pgs,
+	u64 com_pgs,
+	u64 extent,
+	u32 j_id,
+	u32 bin_id,
+	u32 max_allocs,
+	u32 jit_flags,
+	u32 usg_id)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITALLOCINFO;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(va_pgs)
+		+ sizeof(com_pgs)
+		+ sizeof(extent)
+		+ sizeof(j_id)
+		+ sizeof(bin_id)
+		+ sizeof(max_allocs)
+		+ sizeof(jit_flags)
+		+ sizeof(usg_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pgs, sizeof(va_pgs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &com_pgs, sizeof(com_pgs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &extent, sizeof(extent));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &j_id, sizeof(j_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &bin_id, sizeof(bin_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &max_allocs, sizeof(max_allocs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_flags, sizeof(jit_flags));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &usg_id, sizeof(usg_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 j_id)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(j_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &j_id, sizeof(j_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_as_config(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	u64 transtab,
+	u64 memattr,
+	u64 transcfg)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(address_space)
+		+ sizeof(transtab)
+		+ sizeof(memattr)
+		+ sizeof(transcfg)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &address_space, sizeof(address_space));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &transtab, sizeof(transtab));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &memattr, sizeof(memattr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &transcfg, sizeof(transcfg));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_lpu_softstop(
+	struct kbase_tlstream *stream,
+	const void *lpu)
+{
+	const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(lpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &lpu, sizeof(lpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_ex(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softstop_issue(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_start(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_atom_softjob_end(
+	struct kbase_tlstream *stream,
+	const void *atom)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_jd_gpu_soft_reset(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_JD_GPU_SOFT_RESET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_pm_state(
+	struct kbase_tlstream *stream,
+	u32 core_type,
+	u64 core_state_bitset)
+{
+	const u32 msg_id = KBASE_AUX_PM_STATE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(core_type)
+		+ sizeof(core_state_bitset)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &core_type, sizeof(core_type));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &core_state_bitset, sizeof(core_state_bitset));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_pagefault(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u32 as_nr,
+	u64 page_cnt_change)
+{
+	const u32 msg_id = KBASE_AUX_PAGEFAULT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx_nr)
+		+ sizeof(as_nr)
+		+ sizeof(page_cnt_change)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &as_nr, sizeof(as_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &page_cnt_change, sizeof(page_cnt_change));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_pagesalloc(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u64 page_cnt)
+{
+	const u32 msg_id = KBASE_AUX_PAGESALLOC;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx_nr)
+		+ sizeof(page_cnt)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &page_cnt, sizeof(page_cnt));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_devfreq_target(
+	struct kbase_tlstream *stream,
+	u64 target_freq)
+{
+	const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(target_freq)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &target_freq, sizeof(target_freq));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_protected_enter_start(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_protected_enter_end(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_start(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_end(
+	struct kbase_tlstream *stream,
+	const void *gpu)
+{
+	const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_jit_stats(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u32 bid,
+	u32 max_allocs,
+	u32 allocs,
+	u32 va_pages,
+	u32 ph_pages)
+{
+	const u32 msg_id = KBASE_AUX_JIT_STATS;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx_nr)
+		+ sizeof(bid)
+		+ sizeof(max_allocs)
+		+ sizeof(allocs)
+		+ sizeof(va_pages)
+		+ sizeof(ph_pages)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &bid, sizeof(bid));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &max_allocs, sizeof(max_allocs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &allocs, sizeof(allocs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pages, sizeof(va_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ph_pages, sizeof(ph_pages));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_event_job_slot(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	u32 slot_nr,
+	u32 atom_nr,
+	u32 event)
+{
+	const u32 msg_id = KBASE_AUX_EVENT_JOB_SLOT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(ctx)
+		+ sizeof(slot_nr)
+		+ sizeof(atom_nr)
+		+ sizeof(event)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &slot_nr, sizeof(slot_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom_nr, sizeof(atom_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &event, sizeof(event));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_new_kcpuqueue(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx,
+	u32 kcpuq_num_pending_cmds)
+{
+	const u32 msg_id = KBASE_TL_NEW_KCPUQUEUE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(ctx)
+		+ sizeof(kcpuq_num_pending_cmds)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpuq_num_pending_cmds, sizeof(kcpuq_num_pending_cmds));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_ret_kcpuqueue_ctx(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_RET_KCPUQUEUE_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_del_kcpuqueue(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_DEL_KCPUQUEUE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_nret_kcpuqueue_ctx(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx)
+{
+	const u32 msg_id = KBASE_TL_NRET_KCPUQUEUE_CTX;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(ctx)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ctx, sizeof(ctx));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 fence)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(fence)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &fence, sizeof(fence));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 fence)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(fence)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &fence, sizeof(fence));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u32 cqs_obj_compare_value)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(cqs_obj_gpu_addr)
+		+ sizeof(cqs_obj_compare_value)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(cqs_obj_gpu_addr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 debugcopy_dst_size)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(debugcopy_dst_size)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &debugcopy_dst_size, sizeof(debugcopy_dst_size));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 map_import_buf_gpu_addr)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(map_import_buf_gpu_addr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 map_import_buf_gpu_addr)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(map_import_buf_gpu_addr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &map_import_buf_gpu_addr, sizeof(map_import_buf_gpu_addr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_alloc_gpu_alloc_addr_dest,
+	u64 jit_alloc_va_pages,
+	u64 jit_alloc_commit_pages,
+	u64 jit_alloc_extent,
+	u32 jit_alloc_jit_id,
+	u32 jit_alloc_bin_id,
+	u32 jit_alloc_max_allocations,
+	u32 jit_alloc_flags,
+	u32 jit_alloc_usage_id)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(jit_alloc_gpu_alloc_addr_dest)
+		+ sizeof(jit_alloc_va_pages)
+		+ sizeof(jit_alloc_commit_pages)
+		+ sizeof(jit_alloc_extent)
+		+ sizeof(jit_alloc_jit_id)
+		+ sizeof(jit_alloc_bin_id)
+		+ sizeof(jit_alloc_max_allocations)
+		+ sizeof(jit_alloc_flags)
+		+ sizeof(jit_alloc_usage_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_gpu_alloc_addr_dest, sizeof(jit_alloc_gpu_alloc_addr_dest));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_va_pages, sizeof(jit_alloc_va_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_commit_pages, sizeof(jit_alloc_commit_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_extent, sizeof(jit_alloc_extent));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_bin_id, sizeof(jit_alloc_bin_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_max_allocations, sizeof(jit_alloc_max_allocations));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_flags, sizeof(jit_alloc_flags));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_usage_id, sizeof(jit_alloc_usage_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 jit_alloc_jit_id)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(jit_alloc_jit_id)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_jit_id, sizeof(jit_alloc_jit_id));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_alloc_gpu_alloc_addr,
+	u64 jit_alloc_mmu_flags)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(jit_alloc_gpu_alloc_addr)
+		+ sizeof(jit_alloc_mmu_flags)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_alloc_mmu_flags, sizeof(jit_alloc_mmu_flags));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_free_pages_used)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(jit_free_pages_used)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &jit_free_pages_used, sizeof(jit_free_pages_used));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+/* clang-format on */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tracepoints.h b/drivers/gpu/arm/midgard/mali_kbase_tracepoints.h
new file mode 100644
index 0000000..7346493
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_tracepoints.h
@@ -0,0 +1,2417 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
+ * DO NOT EDIT.
+ */
+
+#if !defined(_KBASE_TRACEPOINTS_H)
+#define _KBASE_TRACEPOINTS_H
+
+/* Tracepoints are abstract callbacks notifying that some important
+ * software or hardware event has happened.
+ *
+ * In this particular implementation, it results into a MIPE
+ * timeline event and, in some cases, it also fires an ftrace event
+ * (a.k.a. Gator events, see details below).
+ */
+
+#include "mali_kbase.h"
+#include "mali_kbase_gator.h"
+
+#include <linux/types.h>
+#include <linux/atomic.h>
+
+/* clang-format off */
+
+struct kbase_tlstream;
+
+extern const size_t __obj_stream_offset;
+extern const size_t __aux_stream_offset;
+
+/* This macro dispatches a kbase_tlstream from
+ * a kbase_device instance. Only AUX or OBJ
+ * streams can be dispatched. It is aware of
+ * kbase_timeline binary representation and
+ * relies on offset variables:
+ * __obj_stream_offset and __aux_stream_offset.
+ */
+#define __TL_DISPATCH_STREAM(kbdev, stype) \
+	((struct kbase_tlstream *) \
+	 ((u8 *)kbdev->timeline + __ ## stype ## _stream_offset))
+
+struct tp_desc;
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+extern const char   *obj_desc_header;
+extern const size_t  obj_desc_header_size;
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+extern const char   *aux_desc_header;
+extern const size_t  aux_desc_header_size;
+
+#define TL_ATOM_STATE_IDLE 0
+#define TL_ATOM_STATE_READY 1
+#define TL_ATOM_STATE_DONE 2
+#define TL_ATOM_STATE_POSTED 3
+
+#define TL_JS_EVENT_START     GATOR_JOB_SLOT_START
+#define TL_JS_EVENT_STOP      GATOR_JOB_SLOT_STOP
+#define TL_JS_EVENT_SOFT_STOP GATOR_JOB_SLOT_SOFT_STOPPED
+
+#define TLSTREAM_ENABLED (1 << 31)
+
+void __kbase_tlstream_tl_new_ctx(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	u32 ctx_nr,
+	u32 tgid);
+void __kbase_tlstream_tl_new_gpu(
+	struct kbase_tlstream *stream,
+	const void *gpu,
+	u32 gpu_id,
+	u32 core_count);
+void __kbase_tlstream_tl_new_lpu(
+	struct kbase_tlstream *stream,
+	const void *lpu,
+	u32 lpu_nr,
+	u32 lpu_fn);
+void __kbase_tlstream_tl_new_atom(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 atom_nr);
+void __kbase_tlstream_tl_new_as(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	u32 as_nr);
+void __kbase_tlstream_tl_del_ctx(
+	struct kbase_tlstream *stream,
+	const void *ctx);
+void __kbase_tlstream_tl_del_atom(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_lifelink_lpu_gpu(
+	struct kbase_tlstream *stream,
+	const void *lpu,
+	const void *gpu);
+void __kbase_tlstream_tl_lifelink_as_gpu(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *gpu);
+void __kbase_tlstream_tl_ret_ctx_lpu(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	const void *lpu);
+void __kbase_tlstream_tl_ret_atom_ctx(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *ctx);
+void __kbase_tlstream_tl_ret_atom_lpu(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *lpu,
+	const char *attrib_match_list);
+void __kbase_tlstream_tl_nret_ctx_lpu(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	const void *lpu);
+void __kbase_tlstream_tl_nret_atom_ctx(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *ctx);
+void __kbase_tlstream_tl_nret_atom_lpu(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *lpu);
+void __kbase_tlstream_tl_ret_as_ctx(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *ctx);
+void __kbase_tlstream_tl_nret_as_ctx(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	const void *ctx);
+void __kbase_tlstream_tl_ret_atom_as(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *address_space);
+void __kbase_tlstream_tl_nret_atom_as(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	const void *address_space);
+void __kbase_tlstream_tl_attrib_atom_config(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 descriptor,
+	u64 affinity,
+	u32 config);
+void __kbase_tlstream_tl_attrib_atom_priority(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 prio);
+void __kbase_tlstream_tl_attrib_atom_state(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 state);
+void __kbase_tlstream_tl_attrib_atom_prioritized(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_attrib_atom_jit(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 edit_addr,
+	u64 new_addr,
+	u32 jit_flags,
+	u64 mem_flags,
+	u32 j_id,
+	u64 com_pgs,
+	u64 extent,
+	u64 va_pgs);
+void __kbase_tlstream_tl_jit_usedpages(
+	struct kbase_tlstream *stream,
+	u64 used_pages,
+	u32 j_id);
+void __kbase_tlstream_tl_attrib_atom_jitallocinfo(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 va_pgs,
+	u64 com_pgs,
+	u64 extent,
+	u32 j_id,
+	u32 bin_id,
+	u32 max_allocs,
+	u32 jit_flags,
+	u32 usg_id);
+void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 j_id);
+void __kbase_tlstream_tl_attrib_as_config(
+	struct kbase_tlstream *stream,
+	const void *address_space,
+	u64 transtab,
+	u64 memattr,
+	u64 transcfg);
+void __kbase_tlstream_tl_event_lpu_softstop(
+	struct kbase_tlstream *stream,
+	const void *lpu);
+void __kbase_tlstream_tl_event_atom_softstop_ex(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_event_atom_softstop_issue(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_start(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_tl_event_atom_softjob_end(
+	struct kbase_tlstream *stream,
+	const void *atom);
+void __kbase_tlstream_jd_gpu_soft_reset(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_pm_state(
+	struct kbase_tlstream *stream,
+	u32 core_type,
+	u64 core_state_bitset);
+void __kbase_tlstream_aux_pagefault(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u32 as_nr,
+	u64 page_cnt_change);
+void __kbase_tlstream_aux_pagesalloc(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u64 page_cnt);
+void __kbase_tlstream_aux_devfreq_target(
+	struct kbase_tlstream *stream,
+	u64 target_freq);
+void __kbase_tlstream_aux_protected_enter_start(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_protected_enter_end(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_protected_leave_start(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_protected_leave_end(
+	struct kbase_tlstream *stream,
+	const void *gpu);
+void __kbase_tlstream_aux_jit_stats(
+	struct kbase_tlstream *stream,
+	u32 ctx_nr,
+	u32 bid,
+	u32 max_allocs,
+	u32 allocs,
+	u32 va_pages,
+	u32 ph_pages);
+void __kbase_tlstream_aux_event_job_slot(
+	struct kbase_tlstream *stream,
+	const void *ctx,
+	u32 slot_nr,
+	u32 atom_nr,
+	u32 event);
+void __kbase_tlstream_tl_new_kcpuqueue(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx,
+	u32 kcpuq_num_pending_cmds);
+void __kbase_tlstream_tl_ret_kcpuqueue_ctx(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx);
+void __kbase_tlstream_tl_del_kcpuqueue(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_nret_kcpuqueue_ctx(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *ctx);
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 fence);
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 fence);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u32 cqs_obj_compare_value);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 debugcopy_dst_size);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 map_import_buf_gpu_addr);
+void __kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 map_import_buf_gpu_addr);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_alloc_gpu_alloc_addr_dest,
+	u64 jit_alloc_va_pages,
+	u64 jit_alloc_commit_pages,
+	u64 jit_alloc_extent,
+	u32 jit_alloc_jit_id,
+	u32 jit_alloc_bin_id,
+	u32 jit_alloc_max_allocations,
+	u32 jit_alloc_flags,
+	u32 jit_alloc_usage_id);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 jit_alloc_jit_id);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_alloc_gpu_alloc_addr,
+	u64 jit_alloc_mmu_flags);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 jit_free_pages_used);
+void __kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+
+struct kbase_tlstream;
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_CTX -
+ *   object ctx is created
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ * @ctx_nr:	Kernel context number
+ * @tgid:	Thread Group Id
+ */
+#define KBASE_TLSTREAM_TL_NEW_CTX(	\
+	kbdev,	\
+	ctx,	\
+	ctx_nr,	\
+	tgid	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				ctx, ctx_nr, tgid);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_GPU -
+ *   object gpu is created
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ * @gpu_id:	Name of the GPU object
+ * @core_count:	Number of cores this GPU hosts
+ */
+#define KBASE_TLSTREAM_TL_NEW_GPU(	\
+	kbdev,	\
+	gpu,	\
+	gpu_id,	\
+	core_count	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_gpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				gpu, gpu_id, core_count);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_LPU -
+ *   object lpu is created
+ *
+ * @kbdev:	Kbase device
+ * @lpu:	Name of the Logical Processing Unit object
+ * @lpu_nr:	Sequential number assigned to the newly created LPU
+ * @lpu_fn:	Property describing functional abilities of this LPU
+ */
+#define KBASE_TLSTREAM_TL_NEW_LPU(	\
+	kbdev,	\
+	lpu,	\
+	lpu_nr,	\
+	lpu_fn	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				lpu, lpu_nr, lpu_fn);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_ATOM -
+ *   object atom is created
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @atom_nr:	Sequential number of an atom
+ */
+#define KBASE_TLSTREAM_TL_NEW_ATOM(	\
+	kbdev,	\
+	atom,	\
+	atom_nr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_atom(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, atom_nr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_AS -
+ *   address space object is created
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @as_nr:	Address space number
+ */
+#define KBASE_TLSTREAM_TL_NEW_AS(	\
+	kbdev,	\
+	address_space,	\
+	as_nr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_as(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, as_nr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_CTX -
+ *   context is destroyed
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_DEL_CTX(	\
+	kbdev,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_del_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_ATOM -
+ *   atom is destroyed
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_DEL_ATOM(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_del_atom(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU -
+ *   lpu is deleted with gpu
+ *
+ * @kbdev:	Kbase device
+ * @lpu:	Name of the Logical Processing Unit object
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU(	\
+	kbdev,	\
+	lpu,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_lifelink_lpu_gpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				lpu, gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU -
+ *   address space is deleted with gpu
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_TL_LIFELINK_AS_GPU(	\
+	kbdev,	\
+	address_space,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_lifelink_as_gpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_CTX_LPU -
+ *   context is retained by lpu
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ * @lpu:	Name of the Logical Processing Unit object
+ */
+#define KBASE_TLSTREAM_TL_RET_CTX_LPU(	\
+	kbdev,	\
+	ctx,	\
+	lpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_ctx_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				ctx, lpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX -
+ *   atom is retained by context
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_CTX(	\
+	kbdev,	\
+	atom,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_atom_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU -
+ *   atom is retained by lpu
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @lpu:	Name of the Logical Processing Unit object
+ * @attrib_match_list:	List containing match operator attributes
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_LPU(	\
+	kbdev,	\
+	atom,	\
+	lpu,	\
+	attrib_match_list	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_atom_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, lpu, attrib_match_list);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU -
+ *   context is released by lpu
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ * @lpu:	Name of the Logical Processing Unit object
+ */
+#define KBASE_TLSTREAM_TL_NRET_CTX_LPU(	\
+	kbdev,	\
+	ctx,	\
+	lpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_ctx_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				ctx, lpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX -
+ *   atom is released by context
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_CTX(	\
+	kbdev,	\
+	atom,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_atom_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU -
+ *   atom is released by lpu
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @lpu:	Name of the Logical Processing Unit object
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_LPU(	\
+	kbdev,	\
+	atom,	\
+	lpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_atom_lpu(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, lpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_AS_CTX -
+ *   address space is retained by context
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_RET_AS_CTX(	\
+	kbdev,	\
+	address_space,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_as_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_AS_CTX -
+ *   address space is released by context
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_NRET_AS_CTX(	\
+	kbdev,	\
+	address_space,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_as_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_ATOM_AS -
+ *   atom is retained by address space
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @address_space:	Name of the address space object
+ */
+#define KBASE_TLSTREAM_TL_RET_ATOM_AS(	\
+	kbdev,	\
+	atom,	\
+	address_space	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_atom_as(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, address_space);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS -
+ *   atom is released by address space
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @address_space:	Name of the address space object
+ */
+#define KBASE_TLSTREAM_TL_NRET_ATOM_AS(	\
+	kbdev,	\
+	atom,	\
+	address_space	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_atom_as(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, address_space);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG -
+ *   atom job slot attributes
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @descriptor:	Job descriptor address
+ * @affinity:	Job affinity
+ * @config:	Job config
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG(	\
+	kbdev,	\
+	atom,	\
+	descriptor,	\
+	affinity,	\
+	config	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_attrib_atom_config(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, descriptor, affinity, config);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY -
+ *   atom priority
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @prio:	Atom priority
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(	\
+	kbdev,	\
+	atom,	\
+	prio	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_tl_attrib_atom_priority(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom, prio);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE -
+ *   atom state
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @state:	Atom state
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(	\
+	kbdev,	\
+	atom,	\
+	state	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_tl_attrib_atom_state(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom, state);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED -
+ *   atom caused priority change
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_tl_attrib_atom_prioritized(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT -
+ *   jit done for atom
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @edit_addr:	Address edited by jit
+ * @new_addr:	Address placed into the edited location
+ * @jit_flags:	Flags specifying the special requirements for
+ * the JIT allocation.
+ * @mem_flags:	Flags defining the properties of a memory region
+ * @j_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests.
+ * @com_pgs:	The minimum number of physical pages which
+ * should back the allocation.
+ * @extent:	Granularity of physical pages to grow the
+ * allocation by during a fault.
+ * @va_pgs:	The minimum number of virtual pages required
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(	\
+	kbdev,	\
+	atom,	\
+	edit_addr,	\
+	new_addr,	\
+	jit_flags,	\
+	mem_flags,	\
+	j_id,	\
+	com_pgs,	\
+	extent,	\
+	va_pgs	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED)	\
+			__kbase_tlstream_tl_attrib_atom_jit(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom, edit_addr, new_addr, jit_flags, mem_flags, j_id, com_pgs, extent, va_pgs);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_JIT_USEDPAGES -
+ *   used pages for jit
+ *
+ * @kbdev:	Kbase device
+ * @used_pages:	Number of pages used for jit
+ * @j_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests.
+ */
+#define KBASE_TLSTREAM_TL_JIT_USEDPAGES(	\
+	kbdev,	\
+	used_pages,	\
+	j_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_jit_usedpages(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				used_pages, j_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO -
+ *   Information about JIT allocations
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @va_pgs:	The minimum number of virtual pages required
+ * @com_pgs:	The minimum number of physical pages which
+ * should back the allocation.
+ * @extent:	Granularity of physical pages to grow the
+ * allocation by during a fault.
+ * @j_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests.
+ * @bin_id:	The JIT allocation bin, used in conjunction with
+ * max_allocations to limit the number of each
+ * type of JIT allocation.
+ * @max_allocs:	Maximum allocations allowed in this bin.
+ * @jit_flags:	Flags specifying the special requirements for
+ * the JIT allocation.
+ * @usg_id:	A hint about which allocation should be reused.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(	\
+	kbdev,	\
+	atom,	\
+	va_pgs,	\
+	com_pgs,	\
+	extent,	\
+	j_id,	\
+	bin_id,	\
+	max_allocs,	\
+	jit_flags,	\
+	usg_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_attrib_atom_jitallocinfo(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, va_pgs, com_pgs, extent, j_id, bin_id, max_allocs, jit_flags, usg_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO -
+ *   Information about JIT frees
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ * @j_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests.
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(	\
+	kbdev,	\
+	atom,	\
+	j_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_attrib_atom_jitfreeinfo(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom, j_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG -
+ *   address space attributes
+ *
+ * @kbdev:	Kbase device
+ * @address_space:	Name of the address space object
+ * @transtab:	Configuration of the TRANSTAB register
+ * @memattr:	Configuration of the MEMATTR register
+ * @transcfg:	Configuration of the TRANSCFG register (or zero if not present)
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(	\
+	kbdev,	\
+	address_space,	\
+	transtab,	\
+	memattr,	\
+	transcfg	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_attrib_as_config(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				address_space, transtab, memattr, transcfg);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP -
+ *   softstop event on given lpu
+ *
+ * @kbdev:	Kbase device
+ * @lpu:	Name of the Logical Processing Unit object
+ */
+#define KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP(	\
+	kbdev,	\
+	lpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_lpu_softstop(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				lpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX -
+ *   atom softstopped
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_atom_softstop_ex(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE -
+ *   atom softstop issued
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_atom_softstop_issue(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START -
+ *   atom soft job has started
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_atom_softjob_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END -
+ *   atom soft job has completed
+ *
+ * @kbdev:	Kbase device
+ * @atom:	Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_atom_softjob_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				atom);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET -
+ *   gpu soft reset
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_JD_GPU_SOFT_RESET(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_jd_gpu_soft_reset(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PM_STATE -
+ *   PM state
+ *
+ * @kbdev:	Kbase device
+ * @core_type:	Core type (shader, tiler, l2 cache, l3 cache)
+ * @core_state_bitset:	64bits bitmask reporting power state of the cores
+ * (1-ON, 0-OFF)
+ */
+#define KBASE_TLSTREAM_AUX_PM_STATE(	\
+	kbdev,	\
+	core_type,	\
+	core_state_bitset	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pm_state(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				core_type, core_state_bitset);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGEFAULT -
+ *   Page fault
+ *
+ * @kbdev:	Kbase device
+ * @ctx_nr:	Kernel context number
+ * @as_nr:	Address space number
+ * @page_cnt_change:	Number of pages to be added
+ */
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(	\
+	kbdev,	\
+	ctx_nr,	\
+	as_nr,	\
+	page_cnt_change	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pagefault(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				ctx_nr, as_nr, page_cnt_change);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PAGESALLOC -
+ *   Total alloc pages change
+ *
+ * @kbdev:	Kbase device
+ * @ctx_nr:	Kernel context number
+ * @page_cnt:	Number of pages used by the context
+ */
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(	\
+	kbdev,	\
+	ctx_nr,	\
+	page_cnt	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pagesalloc(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				ctx_nr, page_cnt);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET -
+ *   New device frequency target
+ *
+ * @kbdev:	Kbase device
+ * @target_freq:	New target frequency
+ */
+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(	\
+	kbdev,	\
+	target_freq	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_devfreq_target(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				target_freq);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START -
+ *   enter protected mode start
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_enter_start(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END -
+ *   enter protected mode end
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_enter_end(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START -
+ *   leave protected mode start
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_leave_start(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END -
+ *   leave protected mode end
+ *
+ * @kbdev:	Kbase device
+ * @gpu:	Name of the GPU object
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_leave_end(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_JIT_STATS -
+ *   per-bin JIT statistics
+ *
+ * @kbdev:	Kbase device
+ * @ctx_nr:	Kernel context number
+ * @bid:	JIT bin id
+ * @max_allocs:	Maximum allocations allowed in this bin.
+ * @allocs:	Number of active allocations in this bin
+ * @va_pages:	Number of virtual pages allocated in this bin
+ * @ph_pages:	Number of physical pages allocated in this bin
+ */
+#define KBASE_TLSTREAM_AUX_JIT_STATS(	\
+	kbdev,	\
+	ctx_nr,	\
+	bid,	\
+	max_allocs,	\
+	allocs,	\
+	va_pages,	\
+	ph_pages	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_jit_stats(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT -
+ *   event on a given job slot
+ *
+ * @kbdev:	Kbase device
+ * @ctx:	Name of the context object
+ * @slot_nr:	Job slot number
+ * @atom_nr:	Sequential number of an atom
+ * @event:	Event type. One of TL_JS_EVENT values
+ */
+#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(	\
+	kbdev,	\
+	ctx,	\
+	slot_nr,	\
+	atom_nr,	\
+	event	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_event_job_slot(	\
+				__TL_DISPATCH_STREAM(kbdev, aux), \
+				ctx, slot_nr, atom_nr, event);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NEW_KCPUQUEUE -
+ *   New KCPU Queue
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @ctx:	Name of the context object
+ * @kcpuq_num_pending_cmds:	Number of commands already enqueued
+ * in the KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_NEW_KCPUQUEUE(	\
+	kbdev,	\
+	kcpu_queue,	\
+	ctx,	\
+	kcpuq_num_pending_cmds	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_new_kcpuqueue(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, ctx, kcpuq_num_pending_cmds);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_RET_KCPUQUEUE_CTX -
+ *   Context retains KCPU Queue
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_RET_KCPUQUEUE_CTX(	\
+	kbdev,	\
+	kcpu_queue,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_ret_kcpuqueue_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_DEL_KCPUQUEUE -
+ *   Delete KCPU Queue
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_DEL_KCPUQUEUE(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_del_kcpuqueue(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_NRET_KCPUQUEUE_CTX -
+ *   Context releases KCPU Queue
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @ctx:	Name of the context object
+ */
+#define KBASE_TLSTREAM_TL_NRET_KCPUQUEUE_CTX(	\
+	kbdev,	\
+	kcpu_queue,	\
+	ctx	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_nret_kcpuqueue_ctx(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, ctx);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL -
+ *   KCPU Queue enqueues Signal on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @fence:	Fence object handle
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(	\
+	kbdev,	\
+	kcpu_queue,	\
+	fence	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_signal(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, fence);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT -
+ *   KCPU Queue enqueues Wait on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @fence:	Fence object handle
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_FENCE_WAIT(	\
+	kbdev,	\
+	kcpu_queue,	\
+	fence	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_enqueue_fence_wait(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, fence);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT -
+ *   Begin array of KCPU Queue enqueues Wait on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_wait(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT -
+ *   Array item of KCPU Queue enqueues Wait on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @cqs_obj_gpu_addr:	CQS Object GPU ptr
+ * @cqs_obj_compare_value:	Semaphore value that should be exceeded
+ * for the WAIT to pass
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	cqs_obj_compare_value	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_wait(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT -
+ *   End array of KCPU Queue enqueues Wait on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_wait(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET -
+ *   Begin array of KCPU Queue enqueues Set on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_CQS_SET(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_cqs_set(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET -
+ *   Array item of KCPU Queue enqueues Set on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @cqs_obj_gpu_addr:	CQS Object GPU ptr
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_CQS_SET(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_cqs_set(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, cqs_obj_gpu_addr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET -
+ *   End array of KCPU Queue enqueues Set on Cross Queue Sync Object
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_CQS_SET(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_cqs_set(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY -
+ *   Begin array of KCPU Queue enqueues Debug Copy
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_DEBUGCOPY(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_debugcopy(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY -
+ *   Array item of KCPU Queue enqueues Debug Copy
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @debugcopy_dst_size:	Debug Copy destination size
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_DEBUGCOPY(	\
+	kbdev,	\
+	kcpu_queue,	\
+	debugcopy_dst_size	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_debugcopy(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, debugcopy_dst_size);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY -
+ *   End array of KCPU Queue enqueues Debug Copy
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_DEBUGCOPY(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_debugcopy(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT -
+ *   KCPU Queue enqueues Map Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @map_import_buf_gpu_addr:	Map import buffer GPU ptr
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_MAP_IMPORT(	\
+	kbdev,	\
+	kcpu_queue,	\
+	map_import_buf_gpu_addr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_enqueue_map_import(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, map_import_buf_gpu_addr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT -
+ *   KCPU Queue enqueues Unmap Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @map_import_buf_gpu_addr:	Map import buffer GPU ptr
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(	\
+	kbdev,	\
+	kcpu_queue,	\
+	map_import_buf_gpu_addr	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_enqueue_unmap_import(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, map_import_buf_gpu_addr);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
+ *   Begin array of KCPU Queue enqueues JIT Alloc
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_alloc(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
+ *   Array item of KCPU Queue enqueues JIT Alloc
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @jit_alloc_gpu_alloc_addr_dest:	The GPU virtual address to write
+ * the JIT allocated GPU virtual address to
+ * @jit_alloc_va_pages:	The minimum number of virtual pages required
+ * @jit_alloc_commit_pages:	The minimum number of physical pages which
+ * should back the allocation
+ * @jit_alloc_extent:	Granularity of physical pages to grow the allocation
+ * by during a fault
+ * @jit_alloc_jit_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests. Zero is not a valid value
+ * @jit_alloc_bin_id:	The JIT allocation bin, used in conjunction with
+ * max_allocations to limit the number of each type of JIT allocation
+ * @jit_alloc_max_allocations:	The maximum number of allocations
+ * allowed within the bin specified by bin_id. Should be the same for all
+ * JIT allocations within the same bin.
+ * @jit_alloc_flags:	Flags specifying the special requirements for the
+ * JIT allocation
+ * @jit_alloc_usage_id:	A hint about which allocation should be
+ * reused. The kernel should attempt to use a previous allocation with the same
+ * usage_id
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
+	kbdev,	\
+	kcpu_queue,	\
+	jit_alloc_gpu_alloc_addr_dest,	\
+	jit_alloc_va_pages,	\
+	jit_alloc_commit_pages,	\
+	jit_alloc_extent,	\
+	jit_alloc_jit_id,	\
+	jit_alloc_bin_id,	\
+	jit_alloc_max_allocations,	\
+	jit_alloc_flags,	\
+	jit_alloc_usage_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_alloc(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, jit_alloc_gpu_alloc_addr_dest, jit_alloc_va_pages, jit_alloc_commit_pages, jit_alloc_extent, jit_alloc_jit_id, jit_alloc_bin_id, jit_alloc_max_allocations, jit_alloc_flags, jit_alloc_usage_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
+ *   End array of KCPU Queue enqueues JIT Alloc
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_alloc(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE -
+ *   Begin array of KCPU Queue enqueues JIT Free
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_enqueue_jit_free(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE -
+ *   Array item of KCPU Queue enqueues JIT Free
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @jit_alloc_jit_id:	Unique ID provided by the caller, this is used
+ * to pair allocation and free requests. Zero is not a valid value
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
+	kbdev,	\
+	kcpu_queue,	\
+	jit_alloc_jit_id	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_enqueue_jit_free(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, jit_alloc_jit_id);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE -
+ *   End array of KCPU Queue enqueues JIT Free
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_enqueue_jit_free(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START -
+ *   KCPU Queue starts a Signal on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END -
+ *   KCPU Queue ends a Signal on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_fence_signal_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START -
+ *   KCPU Queue starts a Wait on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END -
+ *   KCPU Queue ends a Wait on Fence
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_FENCE_WAIT_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_fence_wait_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START -
+ *   KCPU Queue starts a Wait on an array of Cross Queue Sync Objects
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END -
+ *   KCPU Queue ends a Wait on an array of Cross Queue Sync Objects
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_WAIT_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_cqs_wait_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START -
+ *   KCPU Queue starts a Set on an array of Cross Queue Sync Objects
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END -
+ *   KCPU Queue ends a Set on an array of Cross Queue Sync Objects
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_CQS_SET_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_cqs_set_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START -
+ *   KCPU Queue starts an array of Debug Copys
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END -
+ *   KCPU Queue ends an array of Debug Copys
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_DEBUGCOPY_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_debugcopy_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START -
+ *   KCPU Queue starts a Map Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_map_import_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END -
+ *   KCPU Queue ends a Map Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_map_import_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START -
+ *   KCPU Queue starts an Unmap Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END -
+ *   KCPU Queue ends an Unmap Import
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_unmap_import_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START -
+ *   KCPU Queue starts an array of JIT Allocs
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_jit_alloc_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END -
+ *   Begin array of KCPU Queue ends an array of JIT Allocs
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_alloc_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END -
+ *   Array item of KCPU Queue ends an array of JIT Allocs
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @jit_alloc_gpu_alloc_addr:	The JIT allocated GPU virtual address
+ * @jit_alloc_mmu_flags:	The MMU flags for the JIT allocation
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	jit_alloc_gpu_alloc_addr,	\
+	jit_alloc_mmu_flags	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_alloc_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END -
+ *   End array of KCPU Queue ends an array of JIT Allocs
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_alloc_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START -
+ *   KCPU Queue starts an array of JIT Frees
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_JIT_FREE_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_jit_free_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END -
+ *   Begin array of KCPU Queue ends an array of JIT Frees
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_begin_kcpuqueue_execute_jit_free_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END -
+ *   Array item of KCPU Queue ends an array of JIT Frees
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ * @jit_free_pages_used:	The actual number of pages used by the JIT
+ * allocation
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	jit_free_pages_used	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_item_kcpuqueue_execute_jit_free_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue, jit_free_pages_used);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END -
+ *   End array of KCPU Queue ends an array of JIT Frees
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_array_end_kcpuqueue_execute_jit_free_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER -
+ *   KCPU Queue executes an Error Barrier
+ *
+ * @kbdev:	Kbase device
+ * @kcpu_queue:	KCPU queue
+ */
+#define KBASE_TLSTREAM_TL_EVENT_KCPUQUEUE_EXECUTE_ERRORBARRIER(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled); \
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_event_kcpuqueue_execute_errorbarrier(	\
+				__TL_DISPATCH_STREAM(kbdev, obj), \
+				kcpu_queue);	\
+	} while (0)
+
+
+/* Gator tracepoints are hooked into TLSTREAM interface.
+ * When the following tracepoints are called, corresponding
+ * Gator tracepoint will be called as well.
+ */
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+/* `event` is one of TL_JS_EVENT values here.
+ * The values of TL_JS_EVENT are guaranteed to match
+ * with corresponding GATOR_JOB_SLOT values.
+ */
+#undef KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT
+#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(kbdev, \
+	context, slot_nr, atom_nr, event)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		kbase_trace_mali_job_slots_event(kbdev->id,	\
+			GATOR_MAKE_EVENT(event, slot_nr),	\
+			context, (u8) atom_nr);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_event_job_slot(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				context, slot_nr, atom_nr, event);	\
+	} while (0)
+
+#undef KBASE_TLSTREAM_AUX_PM_STATE
+#define KBASE_TLSTREAM_AUX_PM_STATE(kbdev, core_type, state)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		kbase_trace_mali_pm_status(kbdev->id,	\
+			core_type, state);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pm_state(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				core_type, state);	\
+	} while (0)
+
+#undef KBASE_TLSTREAM_AUX_PAGEFAULT
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, \
+	ctx_nr, as_nr, page_cnt_change)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		kbase_trace_mali_page_fault_insert_pages(kbdev->id,	\
+			as_nr,	\
+			page_cnt_change);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pagefault(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx_nr, as_nr, page_cnt_change);	\
+	} while (0)
+
+/* kbase_trace_mali_total_alloc_pages_change is handled differently here.
+ * We stream the total amount of pages allocated for `kbdev` rather
+ * than `page_count`, which is per-context.
+ */
+#undef KBASE_TLSTREAM_AUX_PAGESALLOC
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, ctx_nr, page_cnt)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_is_enabled);	\
+		u32 global_pages_count = \
+			atomic_read(&kbdev->memdev.used_pages);	\
+			\
+		kbase_trace_mali_total_alloc_pages_change(kbdev->id,	\
+			global_pages_count);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_aux_pagesalloc(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx_nr, page_cnt);	\
+	} while (0)
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+/* clang-format on */
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h
index f2e5a33..8d4f0443 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_utility.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -29,17 +29,6 @@
 #error "Don't include this file directly, use mali_kbase.h instead"
 #endif
 
-/** Test whether the given list entry is a member of the given list.
- *
- * @param base      The head of the list to be tested
- * @param entry     The list entry to be tested
- *
- * @return          true if entry is a member of base
- *                  false otherwise
- */
-bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
-
-
 static inline void kbase_timer_setup(struct timer_list *timer,
 				     void (*callback)(struct timer_list *timer))
 {
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
index 69d688a..377642d 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,222 +20,111 @@
  *
  */
 
+#include "mali_kbase_vinstr.h"
+#include "mali_kbase_hwcnt_virtualizer.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase_hwcnt_reader.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_ioctl.h"
+#include "mali_malisw.h"
+#include "mali_kbase_debug.h"
+
 #include <linux/anon_inodes.h>
-#include <linux/atomic.h>
+#include <linux/fcntl.h>
+#include <linux/fs.h>
 #include <linux/hrtimer.h>
-#include <linux/jiffies.h>
-#include <linux/kthread.h>
-#include <linux/list.h>
 #include <linux/mm.h>
+#include <linux/mutex.h>
 #include <linux/poll.h>
-#include <linux/preempt.h>
 #include <linux/slab.h>
-#include <linux/wait.h>
-
-#include <mali_kbase.h>
-#include <mali_kbase_hwaccess_instr.h>
-#include <mali_kbase_hwaccess_jm.h>
-#include <mali_kbase_hwcnt_reader.h>
-#include <mali_kbase_mem_linux.h>
-#include <mali_kbase_tlstream.h>
-#ifdef CONFIG_MALI_NO_MALI
-#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif
-
-/*****************************************************************************/
+#include <linux/workqueue.h>
 
 /* Hwcnt reader API version */
-#define HWCNT_READER_API        1
+#define HWCNT_READER_API 1
 
-/* The number of nanoseconds in a second. */
-#define NSECS_IN_SEC            1000000000ull /* ns */
+/* The minimum allowed interval between dumps (equivalent to 10KHz) */
+#define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC)
 
-/* The time resolution of dumping service. */
-#define DUMPING_RESOLUTION      500000ull /* ns */
-
-/* The maximal supported number of dumping buffers. */
-#define MAX_BUFFER_COUNT        32
-
-/* Size and number of hw counters blocks. */
-#define NR_CNT_BLOCKS_PER_GROUP 8
-#define NR_CNT_PER_BLOCK        64
-#define NR_BYTES_PER_CNT        4
-#define NR_BYTES_PER_HDR        16
-#define PRFCNT_EN_MASK_OFFSET   0x8
-
-/*****************************************************************************/
-
-enum {
-	SHADER_HWCNT_BM,
-	TILER_HWCNT_BM,
-	MMU_L2_HWCNT_BM,
-	JM_HWCNT_BM
-};
-
-enum vinstr_state {
-	VINSTR_IDLE,
-	VINSTR_DUMPING,
-	VINSTR_SUSPENDING,
-	VINSTR_SUSPENDED,
-	VINSTR_RESUMING
-};
+/* The maximum allowed buffers per client */
+#define MAX_BUFFER_COUNT 32
 
 /**
- * struct kbase_vinstr_context - vinstr context per device
- * @lock:              protects the entire vinstr context, but the list of
- *                     vinstr clients can be updated outside the lock using
- *                     @state_lock.
- * @kbdev:             pointer to kbase device
- * @kctx:              pointer to kbase context
- * @vmap:              vinstr vmap for mapping hwcnt dump buffer
- * @gpu_va:            GPU hwcnt dump buffer address
- * @cpu_va:            the CPU side mapping of the hwcnt dump buffer
- * @dump_size:         size of the dump buffer in bytes
- * @bitmap:            current set of counters monitored, not always in sync
- *                     with hardware
- * @reprogram:         when true, reprogram hwcnt block with the new set of
- *                     counters
- * @state:             vinstr state
- * @state_lock:        protects information about vinstr state and list of
- *                     clients.
- * @suspend_waitq:     notification queue to trigger state re-validation
- * @suspend_cnt:       reference counter of vinstr's suspend state
- * @suspend_work:      worker to execute on entering suspended state
- * @resume_work:       worker to execute on leaving suspended state
- * @nclients:          number of attached clients, pending or idle
- * @nclients_suspended: number of attached but suspended clients
- * @waiting_clients:   head of list of clients being periodically sampled
- * @idle_clients:      head of list of clients being idle
- * @suspended_clients: head of list of clients being suspended
- * @thread:            periodic sampling thread
- * @waitq:             notification queue of sampling thread
- * @request_pending:   request for action for sampling thread
- * @clients_present:   when true, we have at least one client
- *                     Note: this variable is in sync. with nclients and is
- *                     present to preserve simplicity. Protected by state_lock.
- * @need_suspend:      when true, a suspend has been requested while a resume is
- *                     in progress. Resume worker should queue a suspend.
- * @need_resume:       when true, a resume has been requested while a suspend is
- *                     in progress. Suspend worker should queue a resume.
- * @forced_suspend:    when true, the suspend of vinstr needs to take place
- *                     regardless of the kernel/user space clients attached
- *                     to it. In particular, this flag is set when the suspend
- *                     of vinstr is requested on entering protected mode or at
- *                     the time of device suspend.
+ * struct kbase_vinstr_context - IOCTL interface for userspace hardware
+ *                               counters.
+ * @hvirt:         Hardware counter virtualizer used by vinstr.
+ * @metadata:      Hardware counter metadata provided by virtualizer.
+ * @lock:          Lock protecting all vinstr state.
+ * @suspend_count: Suspend reference count. If non-zero, timer and worker are
+ *                 prevented from being re-scheduled.
+ * @client_count:  Number of vinstr clients.
+ * @clients:       List of vinstr clients.
+ * @dump_timer:    Timer that enqueues dump_work to a workqueue.
+ * @dump_work:     Worker for performing periodic counter dumps.
  */
 struct kbase_vinstr_context {
-	struct mutex             lock;
-	struct kbase_device      *kbdev;
-	struct kbase_context     *kctx;
-
-	struct kbase_vmap_struct *vmap;
-	u64                      gpu_va;
-	void                     *cpu_va;
-	size_t                   dump_size;
-	u32                      bitmap[4];
-	bool                     reprogram;
-
-	enum vinstr_state        state;
-	struct spinlock          state_lock;
-	wait_queue_head_t        suspend_waitq;
-	unsigned int             suspend_cnt;
-	struct work_struct       suspend_work;
-	struct work_struct       resume_work;
-
-	u32                      nclients;
-	u32                      nclients_suspended;
-	struct list_head         waiting_clients;
-	struct list_head         idle_clients;
-	struct list_head         suspended_clients;
-
-	struct task_struct       *thread;
-	wait_queue_head_t        waitq;
-	atomic_t                 request_pending;
-
-	bool                     clients_present;
-
-	bool                     need_suspend;
-	bool                     need_resume;
-	bool                     forced_suspend;
+	struct kbase_hwcnt_virtualizer *hvirt;
+	const struct kbase_hwcnt_metadata *metadata;
+	struct mutex lock;
+	size_t suspend_count;
+	size_t client_count;
+	struct list_head clients;
+	struct hrtimer dump_timer;
+	struct work_struct dump_work;
 };
 
 /**
- * struct kbase_vinstr_client - a vinstr client attached to a vinstr context
- * @vinstr_ctx:    vinstr context client is attached to
- * @list:          node used to attach this client to list in vinstr context
- * @buffer_count:  number of buffers this client is using
- * @event_mask:    events this client reacts to
- * @dump_size:     size of one dump buffer in bytes
- * @bitmap:        bitmap request for JM, TILER, SHADER and MMU counters
- * @legacy_buffer: userspace hwcnt dump buffer (legacy interface)
- * @kernel_buffer: kernel hwcnt dump buffer (kernel client interface)
- * @accum_buffer:  temporary accumulation buffer for preserving counters
- * @dump_time:     next time this clients shall request hwcnt dump
- * @dump_interval: interval between periodic hwcnt dumps
- * @dump_buffers:  kernel hwcnt dump buffers allocated by this client
- * @dump_buffers_meta: metadata of dump buffers
- * @meta_idx:      index of metadata being accessed by userspace
- * @read_idx:      index of buffer read by userspace
- * @write_idx:     index of buffer being written by dumping service
- * @waitq:         client's notification queue
- * @pending:       when true, client has attached but hwcnt not yet updated
- * @suspended:     when true, client is suspended
+ * struct kbase_vinstr_client - A vinstr client attached to a vinstr context.
+ * @vctx:              Vinstr context client is attached to.
+ * @hvcli:             Hardware counter virtualizer client.
+ * @node:              Node used to attach this client to list in vinstr
+ *                     context.
+ * @dump_interval_ns:  Interval between periodic dumps. If 0, not a periodic
+ *                     client.
+ * @next_dump_time_ns: Time in ns when this client's next periodic dump must
+ *                     occur. If 0, not a periodic client.
+ * @enable_map:        Counters enable map.
+ * @dump_bufs:         Array of dump buffers allocated by this client.
+ * @dump_bufs_meta:    Metadata of dump buffers.
+ * @meta_idx:          Index of metadata being accessed by userspace.
+ * @read_idx:          Index of buffer read by userspace.
+ * @write_idx:         Index of buffer being written by dump worker.
+ * @waitq:             Client's notification queue.
  */
 struct kbase_vinstr_client {
-	struct kbase_vinstr_context        *vinstr_ctx;
-	struct list_head                   list;
-	unsigned int                       buffer_count;
-	u32                                event_mask;
-	size_t                             dump_size;
-	u32                                bitmap[4];
-	void __user                        *legacy_buffer;
-	void                               *kernel_buffer;
-	void                               *accum_buffer;
-	u64                                dump_time;
-	u32                                dump_interval;
-	char                               *dump_buffers;
-	struct kbase_hwcnt_reader_metadata *dump_buffers_meta;
-	atomic_t                           meta_idx;
-	atomic_t                           read_idx;
-	atomic_t                           write_idx;
-	wait_queue_head_t                  waitq;
-	bool                               pending;
-	bool                               suspended;
+	struct kbase_vinstr_context *vctx;
+	struct kbase_hwcnt_virtualizer_client *hvcli;
+	struct list_head node;
+	u64 next_dump_time_ns;
+	u32 dump_interval_ns;
+	struct kbase_hwcnt_enable_map enable_map;
+	struct kbase_hwcnt_dump_buffer_array dump_bufs;
+	struct kbase_hwcnt_reader_metadata *dump_bufs_meta;
+	atomic_t meta_idx;
+	atomic_t read_idx;
+	atomic_t write_idx;
+	wait_queue_head_t waitq;
 };
 
-/**
- * struct kbasep_vinstr_wake_up_timer - vinstr service thread wake up timer
- * @hrtimer:    high resolution timer
- * @vinstr_ctx: vinstr context
- */
-struct kbasep_vinstr_wake_up_timer {
-	struct hrtimer              hrtimer;
-	struct kbase_vinstr_context *vinstr_ctx;
-};
-
-/*****************************************************************************/
-
-static void kbase_vinstr_update_suspend(
-		struct kbase_vinstr_context *vinstr_ctx);
-
-static int kbasep_vinstr_service_task(void *data);
-
 static unsigned int kbasep_vinstr_hwcnt_reader_poll(
-		struct file *filp,
-		poll_table  *wait);
-static long kbasep_vinstr_hwcnt_reader_ioctl(
-		struct file   *filp,
-		unsigned int  cmd,
-		unsigned long arg);
-static int kbasep_vinstr_hwcnt_reader_mmap(
-		struct file           *filp,
-		struct vm_area_struct *vma);
-static int kbasep_vinstr_hwcnt_reader_release(
-		struct inode *inode,
-		struct file  *filp);
+	struct file *filp,
+	poll_table *wait);
 
-/* The timeline stream file operations structure. */
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+	struct file *filp,
+	unsigned int cmd,
+	unsigned long arg);
+
+static int kbasep_vinstr_hwcnt_reader_mmap(
+	struct file *filp,
+	struct vm_area_struct *vma);
+
+static int kbasep_vinstr_hwcnt_reader_release(
+	struct inode *inode,
+	struct file *filp);
+
+/* Vinstr client file operations */
 static const struct file_operations vinstr_client_fops = {
+	.owner = THIS_MODULE,
 	.poll           = kbasep_vinstr_hwcnt_reader_poll,
 	.unlocked_ioctl = kbasep_vinstr_hwcnt_reader_ioctl,
 	.compat_ioctl   = kbasep_vinstr_hwcnt_reader_ioctl,
@@ -243,1211 +132,546 @@
 	.release        = kbasep_vinstr_hwcnt_reader_release,
 };
 
-/*****************************************************************************/
-
-static int enable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
-{
-	struct kbase_context *kctx = vinstr_ctx->kctx;
-	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbase_ioctl_hwcnt_enable enable;
-	int err;
-
-	enable.dump_buffer = vinstr_ctx->gpu_va;
-	enable.jm_bm       = vinstr_ctx->bitmap[JM_HWCNT_BM];
-	enable.tiler_bm    = vinstr_ctx->bitmap[TILER_HWCNT_BM];
-	enable.shader_bm   = vinstr_ctx->bitmap[SHADER_HWCNT_BM];
-	enable.mmu_l2_bm   = vinstr_ctx->bitmap[MMU_L2_HWCNT_BM];
-
-	/* Mark the context as active so the GPU is kept turned on */
-	/* A suspend won't happen here, because we're in a syscall from a
-	 * userspace thread. */
-	kbase_pm_context_active(kbdev);
-
-	/* Schedule the context in */
-	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
-	err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
-	if (err) {
-		/* Release the context. This had its own Power Manager Active
-		 * reference */
-		kbasep_js_release_privileged_ctx(kbdev, kctx);
-
-		/* Also release our Power Manager Active reference */
-		kbase_pm_context_idle(kbdev);
-	}
-
-	return err;
-}
-
-static void disable_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
-{
-	struct kbase_context *kctx = vinstr_ctx->kctx;
-	struct kbase_device *kbdev = kctx->kbdev;
-	int err;
-
-	err = kbase_instr_hwcnt_disable_internal(kctx);
-	if (err) {
-		dev_warn(kbdev->dev, "Failed to disable HW counters (ctx:%p)",
-				kctx);
-		return;
-	}
-
-	/* Release the context. This had its own Power Manager Active reference. */
-	kbasep_js_release_privileged_ctx(kbdev, kctx);
-
-	/* Also release our Power Manager Active reference. */
-	kbase_pm_context_idle(kbdev);
-
-	dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", kctx);
-}
-
-static int reprogram_hwcnt(struct kbase_vinstr_context *vinstr_ctx)
-{
-	disable_hwcnt(vinstr_ctx);
-	return enable_hwcnt(vinstr_ctx);
-}
-
-static void hwcnt_bitmap_set(u32 dst[4], u32 src[4])
-{
-	dst[JM_HWCNT_BM]     = src[JM_HWCNT_BM];
-	dst[TILER_HWCNT_BM]  = src[TILER_HWCNT_BM];
-	dst[SHADER_HWCNT_BM] = src[SHADER_HWCNT_BM];
-	dst[MMU_L2_HWCNT_BM] = src[MMU_L2_HWCNT_BM];
-}
-
-static void hwcnt_bitmap_union(u32 dst[4], u32 src[4])
-{
-	dst[JM_HWCNT_BM]     |= src[JM_HWCNT_BM];
-	dst[TILER_HWCNT_BM]  |= src[TILER_HWCNT_BM];
-	dst[SHADER_HWCNT_BM] |= src[SHADER_HWCNT_BM];
-	dst[MMU_L2_HWCNT_BM] |= src[MMU_L2_HWCNT_BM];
-}
-
-size_t kbase_vinstr_dump_size(struct kbase_device *kbdev)
-{
-	size_t dump_size;
-
-#ifndef CONFIG_MALI_NO_MALI
-	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
-		u32 nr_cg;
-
-		nr_cg = kbdev->gpu_props.num_core_groups;
-		dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
-				NR_CNT_PER_BLOCK *
-				NR_BYTES_PER_CNT;
-	} else
-#endif /* CONFIG_MALI_NO_MALI */
-	{
-		/* assume v5 for now */
-#ifdef CONFIG_MALI_NO_MALI
-		u32 nr_l2 = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
-		u64 core_mask =
-			(1ULL << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
-#else
-		base_gpu_props *props = &kbdev->gpu_props.props;
-		u32 nr_l2 = props->l2_props.num_l2_slices;
-		u64 core_mask = props->coherency_info.group[0].core_mask;
-#endif
-		u32 nr_blocks = fls64(core_mask);
-
-		/* JM and tiler counter blocks are always present */
-		dump_size = (2 + nr_l2 + nr_blocks) *
-				NR_CNT_PER_BLOCK *
-				NR_BYTES_PER_CNT;
-	}
-	return dump_size;
-}
-KBASE_EXPORT_TEST_API(kbase_vinstr_dump_size);
-
-static size_t kbasep_vinstr_dump_size_ctx(
-		struct kbase_vinstr_context *vinstr_ctx)
-{
-	return kbase_vinstr_dump_size(vinstr_ctx->kctx->kbdev);
-}
-
-static int kbasep_vinstr_map_kernel_dump_buffer(
-		struct kbase_vinstr_context *vinstr_ctx)
-{
-	struct kbase_va_region *reg;
-	struct kbase_context *kctx = vinstr_ctx->kctx;
-	u64 flags, nr_pages;
-
-	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR |
-		BASE_MEM_PERMANENT_KERNEL_MAPPING | BASE_MEM_CACHED_CPU;
-	if (kctx->kbdev->mmu_mode->flags &
-			KBASE_MMU_MODE_HAS_NON_CACHEABLE)
-		flags |= BASE_MEM_UNCACHED_GPU;
-	vinstr_ctx->dump_size = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
-	nr_pages = PFN_UP(vinstr_ctx->dump_size);
-
-	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
-			&vinstr_ctx->gpu_va);
-	if (!reg)
-		return -ENOMEM;
-
-	vinstr_ctx->cpu_va = kbase_phy_alloc_mapping_get(kctx,
-			vinstr_ctx->gpu_va, &vinstr_ctx->vmap);
-
-	if (!vinstr_ctx->cpu_va) {
-		kbase_mem_free(kctx, vinstr_ctx->gpu_va);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static void kbasep_vinstr_unmap_kernel_dump_buffer(
-		struct kbase_vinstr_context *vinstr_ctx)
-{
-	struct kbase_context *kctx = vinstr_ctx->kctx;
-
-	kbase_phy_alloc_mapping_put(kctx, vinstr_ctx->vmap);
-	kbase_mem_free(kctx, vinstr_ctx->gpu_va);
-}
-
 /**
- * kbasep_vinstr_create_kctx - create kernel context for vinstr
- * @vinstr_ctx: vinstr context
- * Return: zero on success
- */
-static int kbasep_vinstr_create_kctx(struct kbase_vinstr_context *vinstr_ctx)
-{
-	struct kbase_device *kbdev = vinstr_ctx->kbdev;
-	struct kbasep_kctx_list_element *element = NULL;
-	unsigned long flags;
-	bool enable_backend = false;
-	int err;
-
-	vinstr_ctx->kctx = kbase_create_context(vinstr_ctx->kbdev, true);
-	if (!vinstr_ctx->kctx)
-		return -ENOMEM;
-
-	/* Map the master kernel dump buffer.  The HW dumps the counters
-	 * into this memory region. */
-	err = kbasep_vinstr_map_kernel_dump_buffer(vinstr_ctx);
-	if (err)
-		goto failed_map;
-
-	/* Add kernel context to list of contexts associated with device. */
-	element = kzalloc(sizeof(*element), GFP_KERNEL);
-	if (element) {
-		element->kctx = vinstr_ctx->kctx;
-		mutex_lock(&kbdev->kctx_list_lock);
-		list_add(&element->link, &kbdev->kctx_list);
-
-		/* Inform timeline client about new context.
-		 * Do this while holding the lock to avoid tracepoint
-		 * being created in both body and summary stream. */
-		KBASE_TLSTREAM_TL_NEW_CTX(
-				vinstr_ctx->kctx,
-				vinstr_ctx->kctx->id,
-				(u32)(vinstr_ctx->kctx->tgid));
-
-		mutex_unlock(&kbdev->kctx_list_lock);
-	} else {
-		/* Don't treat this as a fail - just warn about it. */
-		dev_warn(kbdev->dev,
-				"couldn't add kctx to kctx_list\n");
-	}
-
-	/* Don't enable hardware counters if vinstr is suspended.
-	 * Note that vinstr resume code is run under vinstr context lock,
-	 * lower layer will be enabled as needed on resume. */
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	if (VINSTR_IDLE == vinstr_ctx->state)
-		enable_backend = true;
-	vinstr_ctx->clients_present = true;
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-	if (enable_backend)
-		err = enable_hwcnt(vinstr_ctx);
-	if (err)
-		goto failed_enable;
-
-	vinstr_ctx->thread = kthread_run(
-			kbasep_vinstr_service_task,
-			vinstr_ctx,
-			"mali_vinstr_service");
-	if (IS_ERR(vinstr_ctx->thread)) {
-		err = PTR_ERR(vinstr_ctx->thread);
-		goto failed_kthread;
-	}
-
-	return 0;
-
-failed_kthread:
-	disable_hwcnt(vinstr_ctx);
-failed_enable:
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	vinstr_ctx->clients_present = false;
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
-	if (element) {
-		mutex_lock(&kbdev->kctx_list_lock);
-		list_del(&element->link);
-		kfree(element);
-		mutex_unlock(&kbdev->kctx_list_lock);
-		KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
-	}
-failed_map:
-	kbase_destroy_context(vinstr_ctx->kctx);
-	vinstr_ctx->kctx = NULL;
-	return err;
-}
-
-/**
- * kbasep_vinstr_destroy_kctx - destroy vinstr's kernel context
- * @vinstr_ctx: vinstr context
- */
-static void kbasep_vinstr_destroy_kctx(struct kbase_vinstr_context *vinstr_ctx)
-{
-	struct kbase_device             *kbdev = vinstr_ctx->kbdev;
-	struct kbasep_kctx_list_element *element;
-	struct kbasep_kctx_list_element *tmp;
-	bool                            found = false;
-	bool                            hwcnt_disabled = false;
-	unsigned long                   flags;
-
-	/* Release hw counters dumping resources. */
-	vinstr_ctx->thread = NULL;
-
-	/* Simplify state transitions by specifying that we have no clients. */
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	vinstr_ctx->clients_present = false;
-	if ((VINSTR_SUSPENDED == vinstr_ctx->state) || (VINSTR_RESUMING == vinstr_ctx->state))
-		hwcnt_disabled = true;
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-	if (!hwcnt_disabled)
-		disable_hwcnt(vinstr_ctx);
-
-	kbasep_vinstr_unmap_kernel_dump_buffer(vinstr_ctx);
-
-	/* Remove kernel context from the device's contexts list. */
-	mutex_lock(&kbdev->kctx_list_lock);
-	list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
-		if (element->kctx == vinstr_ctx->kctx) {
-			list_del(&element->link);
-			kfree(element);
-			found = true;
-		}
-	}
-	mutex_unlock(&kbdev->kctx_list_lock);
-
-	if (!found)
-		dev_warn(kbdev->dev, "kctx not in kctx_list\n");
-
-	/* Destroy context. */
-	kbase_destroy_context(vinstr_ctx->kctx);
-
-	/* Inform timeline client about context destruction. */
-	KBASE_TLSTREAM_TL_DEL_CTX(vinstr_ctx->kctx);
-
-	vinstr_ctx->kctx = NULL;
-}
-
-/**
- * kbasep_vinstr_attach_client - Attach a client to the vinstr core
- * @vinstr_ctx:    vinstr context
- * @buffer_count:  requested number of dump buffers
- * @bitmap:        bitmaps describing which counters should be enabled
- * @argp:          pointer where notification descriptor shall be stored
- * @kernel_buffer: pointer to kernel side buffer
+ * kbasep_vinstr_timestamp_ns() - Get the current time in nanoseconds.
  *
- * Return: vinstr opaque client handle or NULL on failure
+ * Return: Current time in nanoseconds.
  */
-static struct kbase_vinstr_client *kbasep_vinstr_attach_client(
-		struct kbase_vinstr_context *vinstr_ctx, u32 buffer_count,
-		u32 bitmap[4], void *argp, void *kernel_buffer)
-{
-	struct task_struct         *thread = NULL;
-	struct kbase_vinstr_client *cli;
-	unsigned long flags;
-	bool clients_present = false;
-
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-
-	if (buffer_count > MAX_BUFFER_COUNT
-	    || (buffer_count & (buffer_count - 1)))
-		return NULL;
-
-	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
-	if (!cli)
-		return NULL;
-
-	cli->vinstr_ctx   = vinstr_ctx;
-	cli->buffer_count = buffer_count;
-	cli->event_mask   =
-		(1 << BASE_HWCNT_READER_EVENT_MANUAL) |
-		(1 << BASE_HWCNT_READER_EVENT_PERIODIC);
-	cli->pending      = true;
-
-	hwcnt_bitmap_set(cli->bitmap, bitmap);
-
-	mutex_lock(&vinstr_ctx->lock);
-
-	hwcnt_bitmap_union(vinstr_ctx->bitmap, cli->bitmap);
-	vinstr_ctx->reprogram = true;
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-	/* If this is the first client, create the vinstr kbase
-	 * context. This context is permanently resident until the
-	 * last client exits. */
-	if (!clients_present) {
-		hwcnt_bitmap_set(vinstr_ctx->bitmap, cli->bitmap);
-		if (kbasep_vinstr_create_kctx(vinstr_ctx) < 0)
-			goto error;
-
-		vinstr_ctx->reprogram = false;
-		cli->pending = false;
-	}
-
-	/* The GPU resets the counter block every time there is a request
-	 * to dump it. We need a per client kernel buffer for accumulating
-	 * the counters. */
-	cli->dump_size    = kbasep_vinstr_dump_size_ctx(vinstr_ctx);
-	cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
-	if (!cli->accum_buffer)
-		goto error;
-
-	/* Prepare buffers. */
-	if (cli->buffer_count) {
-		int *fd = (int *)argp;
-		size_t tmp;
-
-		/* Allocate area for buffers metadata storage. */
-		tmp = sizeof(struct kbase_hwcnt_reader_metadata) *
-			cli->buffer_count;
-		cli->dump_buffers_meta = kmalloc(tmp, GFP_KERNEL);
-		if (!cli->dump_buffers_meta)
-			goto error;
-
-		/* Allocate required number of dumping buffers. */
-		cli->dump_buffers = (char *)__get_free_pages(
-				GFP_KERNEL | __GFP_ZERO,
-				get_order(cli->dump_size * cli->buffer_count));
-		if (!cli->dump_buffers)
-			goto error;
-
-		/* Create descriptor for user-kernel data exchange. */
-		*fd = anon_inode_getfd(
-				"[mali_vinstr_desc]",
-				&vinstr_client_fops,
-				cli,
-				O_RDONLY | O_CLOEXEC);
-		if (0 > *fd)
-			goto error;
-	} else if (kernel_buffer) {
-		cli->kernel_buffer = kernel_buffer;
-	} else {
-		cli->legacy_buffer = (void __user *)argp;
-	}
-
-	atomic_set(&cli->read_idx, 0);
-	atomic_set(&cli->meta_idx, 0);
-	atomic_set(&cli->write_idx, 0);
-	init_waitqueue_head(&cli->waitq);
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	vinstr_ctx->nclients++;
-	list_add(&cli->list, &vinstr_ctx->idle_clients);
-	kbase_vinstr_update_suspend(vinstr_ctx);
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-	mutex_unlock(&vinstr_ctx->lock);
-
-	return cli;
-
-error:
-	kfree(cli->dump_buffers_meta);
-	if (cli->dump_buffers)
-		free_pages(
-				(unsigned long)cli->dump_buffers,
-				get_order(cli->dump_size * cli->buffer_count));
-	kfree(cli->accum_buffer);
-	if (!clients_present && vinstr_ctx->kctx) {
-		thread = vinstr_ctx->thread;
-		kbasep_vinstr_destroy_kctx(vinstr_ctx);
-	}
-	kfree(cli);
-
-	mutex_unlock(&vinstr_ctx->lock);
-
-	/* Thread must be stopped after lock is released. */
-	if (thread)
-		kthread_stop(thread);
-
-	return NULL;
-}
-
-void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli)
-{
-	struct kbase_vinstr_context *vinstr_ctx;
-	struct kbase_vinstr_client  *iter, *tmp;
-	struct task_struct          *thread = NULL;
-	u32 zerobitmap[4] = { 0 };
-	int cli_found = 0;
-	unsigned long flags;
-	bool clients_present;
-
-	KBASE_DEBUG_ASSERT(cli);
-	vinstr_ctx = cli->vinstr_ctx;
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-
-	mutex_lock(&vinstr_ctx->lock);
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-
-	list_for_each_entry_safe(iter, tmp, &vinstr_ctx->idle_clients, list) {
-		if (iter == cli) {
-			cli_found = 1;
-			break;
-		}
-	}
-	if (!cli_found) {
-		list_for_each_entry_safe(
-				iter, tmp, &vinstr_ctx->waiting_clients, list) {
-			if (iter == cli) {
-				cli_found = 1;
-				break;
-			}
-		}
-	}
-	if (!cli_found) {
-		list_for_each_entry_safe(
-				iter, tmp, &vinstr_ctx->suspended_clients, list) {
-			if (iter == cli) {
-				cli_found = 1;
-				break;
-			}
-		}
-	}
-	KBASE_DEBUG_ASSERT(cli_found);
-
-	if (cli_found) {
-		vinstr_ctx->reprogram = true;
-		list_del(&iter->list);
-	}
-
-	if (!cli->suspended)
-		vinstr_ctx->nclients--;
-	else
-		vinstr_ctx->nclients_suspended--;
-
-	kbase_vinstr_update_suspend(vinstr_ctx);
-
-	clients_present = (vinstr_ctx->nclients || vinstr_ctx->nclients_suspended);
-
-	/* Rebuild context bitmap now that the client has detached */
-	hwcnt_bitmap_set(vinstr_ctx->bitmap, zerobitmap);
-	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list)
-		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
-	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list)
-		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
-	list_for_each_entry(iter, &vinstr_ctx->suspended_clients, list)
-		hwcnt_bitmap_union(vinstr_ctx->bitmap, iter->bitmap);
-
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-	kfree(cli->dump_buffers_meta);
-	free_pages(
-			(unsigned long)cli->dump_buffers,
-			get_order(cli->dump_size * cli->buffer_count));
-	kfree(cli->accum_buffer);
-	kfree(cli);
-
-	if (!clients_present) {
-		thread = vinstr_ctx->thread;
-		kbasep_vinstr_destroy_kctx(vinstr_ctx);
-	}
-
-	mutex_unlock(&vinstr_ctx->lock);
-
-	/* Thread must be stopped after lock is released. */
-	if (thread)
-		kthread_stop(thread);
-}
-KBASE_EXPORT_TEST_API(kbase_vinstr_detach_client);
-
-/* Accumulate counters in the dump buffer */
-static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
-{
-	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
-	u32 *d = dst;
-	u32 *s = src;
-	size_t i, j;
-
-	for (i = 0; i < dump_size; i += block_size) {
-		/* skip over the header block */
-		d += NR_BYTES_PER_HDR / sizeof(u32);
-		s += NR_BYTES_PER_HDR / sizeof(u32);
-		for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
-			/* saturate result if addition would result in wraparound */
-			if (U32_MAX - *d < *s)
-				*d = U32_MAX;
-			else
-				*d += *s;
-			d++;
-			s++;
-		}
-	}
-}
-
-/* This is the Midgard v4 patch function.  It copies the headers for each
- * of the defined blocks from the master kernel buffer and then patches up
- * the performance counter enable mask for each of the blocks to exclude
- * counters that were not requested by the client. */
-static void patch_dump_buffer_hdr_v4(
-		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_vinstr_client *cli)
-{
-	u32 *mask;
-	u8 *dst = cli->accum_buffer;
-	u8 *src = vinstr_ctx->cpu_va;
-	u32 nr_cg = vinstr_ctx->kctx->kbdev->gpu_props.num_core_groups;
-	size_t i, group_size, group;
-	enum {
-		SC0_BASE    = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
-		SC1_BASE    = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
-		SC2_BASE    = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
-		SC3_BASE    = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
-		TILER_BASE  = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
-		MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
-		JM_BASE     = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
-	};
-
-	group_size = NR_CNT_BLOCKS_PER_GROUP *
-			NR_CNT_PER_BLOCK *
-			NR_BYTES_PER_CNT;
-	for (i = 0; i < nr_cg; i++) {
-		group = i * group_size;
-		/* copy shader core headers */
-		memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
-		       NR_BYTES_PER_HDR);
-		memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
-		       NR_BYTES_PER_HDR);
-		memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
-		      NR_BYTES_PER_HDR);
-		memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
-		      NR_BYTES_PER_HDR);
-
-		/* copy tiler header */
-		memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
-		      NR_BYTES_PER_HDR);
-
-		/* copy mmu header */
-		memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
-		      NR_BYTES_PER_HDR);
-
-		/* copy job manager header */
-		memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
-		      NR_BYTES_PER_HDR);
-
-		/* patch the shader core enable mask */
-		mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
-		*mask &= cli->bitmap[SHADER_HWCNT_BM];
-		mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
-		*mask &= cli->bitmap[SHADER_HWCNT_BM];
-		mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
-		*mask &= cli->bitmap[SHADER_HWCNT_BM];
-		mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
-		*mask &= cli->bitmap[SHADER_HWCNT_BM];
-
-		/* patch the tiler core enable mask */
-		mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
-		*mask &= cli->bitmap[TILER_HWCNT_BM];
-
-		/* patch the mmu core enable mask */
-		mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
-		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
-
-		/* patch the job manager enable mask */
-		mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
-		*mask &= cli->bitmap[JM_HWCNT_BM];
-	}
-}
-
-/* This is the Midgard v5 patch function.  It copies the headers for each
- * of the defined blocks from the master kernel buffer and then patches up
- * the performance counter enable mask for each of the blocks to exclude
- * counters that were not requested by the client. */
-static void patch_dump_buffer_hdr_v5(
-		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_vinstr_client *cli)
-{
-	struct kbase_device *kbdev = vinstr_ctx->kctx->kbdev;
-	u32 i, nr_l2;
-	u64 core_mask;
-	u32 *mask;
-	u8 *dst = cli->accum_buffer;
-	u8 *src = vinstr_ctx->cpu_va;
-	size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
-
-	/* copy and patch job manager header */
-	memcpy(dst, src, NR_BYTES_PER_HDR);
-	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
-	*mask &= cli->bitmap[JM_HWCNT_BM];
-	dst += block_size;
-	src += block_size;
-
-	/* copy and patch tiler header */
-	memcpy(dst, src, NR_BYTES_PER_HDR);
-	mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
-	*mask &= cli->bitmap[TILER_HWCNT_BM];
-	dst += block_size;
-	src += block_size;
-
-	/* copy and patch MMU/L2C headers */
-	nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
-	for (i = 0; i < nr_l2; i++) {
-		memcpy(dst, src, NR_BYTES_PER_HDR);
-		mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
-		*mask &= cli->bitmap[MMU_L2_HWCNT_BM];
-		dst += block_size;
-		src += block_size;
-	}
-
-	/* copy and patch shader core headers */
-	core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
-	while (0ull != core_mask) {
-		memcpy(dst, src, NR_BYTES_PER_HDR);
-		if (0ull != (core_mask & 1ull)) {
-			/* if block is not reserved update header */
-			mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
-			*mask &= cli->bitmap[SHADER_HWCNT_BM];
-		}
-		dst += block_size;
-		src += block_size;
-
-		core_mask >>= 1;
-	}
-}
-
-/**
- * accum_clients - accumulate dumped hw counters for all known clients
- * @vinstr_ctx: vinstr context
- */
-static void accum_clients(struct kbase_vinstr_context *vinstr_ctx)
-{
-	struct kbase_vinstr_client *iter;
-	int v4 = 0;
-
-#ifndef CONFIG_MALI_NO_MALI
-	v4 = kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4);
-#endif
-
-	list_for_each_entry(iter, &vinstr_ctx->idle_clients, list) {
-		/* Don't bother accumulating clients whose hwcnt requests
-		 * have not yet been honoured. */
-		if (iter->pending)
-			continue;
-		if (v4)
-			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
-		else
-			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
-		accum_dump_buffer(
-				iter->accum_buffer,
-				vinstr_ctx->cpu_va,
-				iter->dump_size);
-	}
-	list_for_each_entry(iter, &vinstr_ctx->waiting_clients, list) {
-		/* Don't bother accumulating clients whose hwcnt requests
-		 * have not yet been honoured. */
-		if (iter->pending)
-			continue;
-		if (v4)
-			patch_dump_buffer_hdr_v4(vinstr_ctx, iter);
-		else
-			patch_dump_buffer_hdr_v5(vinstr_ctx, iter);
-		accum_dump_buffer(
-				iter->accum_buffer,
-				vinstr_ctx->cpu_va,
-				iter->dump_size);
-	}
-}
-
-/*****************************************************************************/
-
-/**
- * kbasep_vinstr_get_timestamp - return timestamp
- *
- * Function returns timestamp value based on raw monotonic timer. Value will
- * wrap around zero in case of overflow.
- *
- * Return: timestamp value
- */
-static u64 kbasep_vinstr_get_timestamp(void)
+static u64 kbasep_vinstr_timestamp_ns(void)
 {
 	struct timespec ts;
 
 	getrawmonotonic(&ts);
-	return (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+	return (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
 }
 
 /**
- * kbasep_vinstr_add_dump_request - register client's dumping request
- * @cli:             requesting client
- * @waiting_clients: list of pending dumping requests
- */
-static void kbasep_vinstr_add_dump_request(
-		struct kbase_vinstr_client *cli,
-		struct list_head *waiting_clients)
-{
-	struct kbase_vinstr_client *tmp;
-
-	if (list_empty(waiting_clients)) {
-		list_add(&cli->list, waiting_clients);
-		return;
-	}
-	list_for_each_entry(tmp, waiting_clients, list) {
-		if (tmp->dump_time > cli->dump_time) {
-			list_add_tail(&cli->list, &tmp->list);
-			return;
-		}
-	}
-	list_add_tail(&cli->list, waiting_clients);
-}
-
-/**
- * kbasep_vinstr_collect_and_accumulate - collect hw counters via low level
- *                                        dump and accumulate them for known
- *                                        clients
- * @vinstr_ctx: vinstr context
- * @timestamp: pointer where collection timestamp will be recorded
+ * kbasep_vinstr_next_dump_time_ns() - Calculate the next periodic dump time.
+ * @cur_ts_ns: Current time in nanoseconds.
+ * @interval:  Interval between dumps in nanoseconds.
  *
- * Return: zero on success
+ * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump
+ *         time that occurs after cur_ts_ns.
  */
-static int kbasep_vinstr_collect_and_accumulate(
-		struct kbase_vinstr_context *vinstr_ctx, u64 *timestamp)
+static u64 kbasep_vinstr_next_dump_time_ns(u64 cur_ts_ns, u32 interval)
 {
-	unsigned long flags;
-	int rcode;
+	/* Non-periodic client */
+	if (interval == 0)
+		return 0;
 
-#ifdef CONFIG_MALI_NO_MALI
-	/* The dummy model needs the CPU mapping. */
-	gpu_model_set_dummy_prfcnt_base_cpu(vinstr_ctx->cpu_va);
-#endif
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	if (VINSTR_IDLE != vinstr_ctx->state) {
-		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-		return -EAGAIN;
-	} else {
-		vinstr_ctx->state = VINSTR_DUMPING;
-	}
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-	/* Request HW counters dump.
-	 * Disable preemption to make dump timestamp more accurate. */
-	preempt_disable();
-	*timestamp = kbasep_vinstr_get_timestamp();
-	rcode = kbase_instr_hwcnt_request_dump(vinstr_ctx->kctx);
-	preempt_enable();
-
-	if (!rcode)
-		rcode = kbase_instr_hwcnt_wait_for_dump(vinstr_ctx->kctx);
-	WARN_ON(rcode);
-
-	if (!rcode) {
-		/* Invalidate the kernel buffer before reading from it.
-		 * As the vinstr_ctx->lock is already held by the caller, the
-		 * unmap of kernel buffer cannot take place simultaneously.
-		 */
-		lockdep_assert_held(&vinstr_ctx->lock);
-		kbase_sync_mem_regions(vinstr_ctx->kctx, vinstr_ctx->vmap,
-				KBASE_SYNC_TO_CPU);
-	}
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	switch (vinstr_ctx->state) {
-	case VINSTR_SUSPENDING:
-		schedule_work(&vinstr_ctx->suspend_work);
-		break;
-	case VINSTR_DUMPING:
-		vinstr_ctx->state = VINSTR_IDLE;
-		wake_up_all(&vinstr_ctx->suspend_waitq);
-		break;
-	default:
-		break;
-	}
-
-	/* Accumulate values of collected counters. */
-	if (!rcode)
-		accum_clients(vinstr_ctx);
-
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-	return rcode;
+	/*
+	 * Return the next interval after the current time relative to t=0.
+	 * This means multiple clients with the same period will synchronise,
+	 * regardless of when they were started, allowing the worker to be
+	 * scheduled less frequently.
+	 */
+	do_div(cur_ts_ns, interval);
+	return (cur_ts_ns + 1) * interval;
 }
 
 /**
- * kbasep_vinstr_fill_dump_buffer - copy accumulated counters to empty kernel
- *                                  buffer
- * @cli:       requesting client
- * @timestamp: timestamp when counters were collected
- * @event_id:  id of event that caused triggered counters collection
+ * kbasep_vinstr_client_dump() - Perform a dump for a client.
+ * @vcli:     Non-NULL pointer to a vinstr client.
+ * @event_id: Event type that triggered the dump.
  *
- * Return: zero on success
+ * Return: 0 on success, else error code.
  */
-static int kbasep_vinstr_fill_dump_buffer(
-		struct kbase_vinstr_client *cli, u64 timestamp,
-		enum base_hwcnt_reader_event event_id)
+static int kbasep_vinstr_client_dump(
+	struct kbase_vinstr_client *vcli,
+	enum base_hwcnt_reader_event event_id)
 {
-	unsigned int write_idx = atomic_read(&cli->write_idx);
-	unsigned int read_idx  = atomic_read(&cli->read_idx);
-
+	int errcode;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+	unsigned int write_idx;
+	unsigned int read_idx;
+	struct kbase_hwcnt_dump_buffer *dump_buf;
 	struct kbase_hwcnt_reader_metadata *meta;
-	void                               *buffer;
+
+	WARN_ON(!vcli);
+	lockdep_assert_held(&vcli->vctx->lock);
+
+	write_idx = atomic_read(&vcli->write_idx);
+	read_idx = atomic_read(&vcli->read_idx);
 
 	/* Check if there is a place to copy HWC block into. */
-	if (write_idx - read_idx == cli->buffer_count)
-		return -1;
-	write_idx %= cli->buffer_count;
+	if (write_idx - read_idx == vcli->dump_bufs.buf_cnt)
+		return -EBUSY;
+	write_idx %= vcli->dump_bufs.buf_cnt;
 
-	/* Fill in dump buffer and its metadata. */
-	buffer = &cli->dump_buffers[write_idx * cli->dump_size];
-	meta   = &cli->dump_buffers_meta[write_idx];
-	meta->timestamp  = timestamp;
-	meta->event_id   = event_id;
+	dump_buf = &vcli->dump_bufs.bufs[write_idx];
+	meta = &vcli->dump_bufs_meta[write_idx];
+
+	errcode = kbase_hwcnt_virtualizer_client_dump(
+		vcli->hvcli, &ts_start_ns, &ts_end_ns, dump_buf);
+	if (errcode)
+		return errcode;
+
+	/* Patch the dump buf headers, to hide the counters that other hwcnt
+	 * clients are using.
+	 */
+	kbase_hwcnt_gpu_patch_dump_headers(dump_buf, &vcli->enable_map);
+
+	/* Zero all non-enabled counters (current values are undefined) */
+	kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map);
+
+	meta->timestamp = ts_end_ns;
+	meta->event_id = event_id;
 	meta->buffer_idx = write_idx;
-	memcpy(buffer, cli->accum_buffer, cli->dump_size);
-	return 0;
-}
-
-/**
- * kbasep_vinstr_fill_dump_buffer_legacy - copy accumulated counters to buffer
- *                                         allocated in userspace
- * @cli: requesting client
- *
- * Return: zero on success
- *
- * This is part of legacy ioctl interface.
- */
-static int kbasep_vinstr_fill_dump_buffer_legacy(
-		struct kbase_vinstr_client *cli)
-{
-	void __user  *buffer = cli->legacy_buffer;
-	int          rcode;
-
-	/* Copy data to user buffer. */
-	rcode = copy_to_user(buffer, cli->accum_buffer, cli->dump_size);
-	if (rcode) {
-		pr_warn("error while copying buffer to user\n");
-		return -EFAULT;
-	}
-	return 0;
-}
-
-/**
- * kbasep_vinstr_fill_dump_buffer_kernel - copy accumulated counters to buffer
- *                                         allocated in kernel space
- * @cli: requesting client
- *
- * Return: zero on success
- *
- * This is part of the kernel client interface.
- */
-static int kbasep_vinstr_fill_dump_buffer_kernel(
-		struct kbase_vinstr_client *cli)
-{
-	memcpy(cli->kernel_buffer, cli->accum_buffer, cli->dump_size);
-
-	return 0;
-}
-
-/**
- * kbasep_vinstr_reprogram - reprogram hwcnt set collected by inst
- * @vinstr_ctx: vinstr context
- */
-static void kbasep_vinstr_reprogram(
-		struct kbase_vinstr_context *vinstr_ctx)
-{
-	unsigned long flags;
-	bool suspended = false;
-
-	/* Don't enable hardware counters if vinstr is suspended. */
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	if (VINSTR_IDLE != vinstr_ctx->state)
-		suspended = true;
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-	if (suspended)
-		return;
-
-	/* Change to suspended state is done while holding vinstr context
-	 * lock. Below code will then no re-enable the instrumentation. */
-
-	if (vinstr_ctx->reprogram) {
-		struct kbase_vinstr_client *iter;
-
-		if (!reprogram_hwcnt(vinstr_ctx)) {
-			vinstr_ctx->reprogram = false;
-			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-			list_for_each_entry(
-					iter,
-					&vinstr_ctx->idle_clients,
-					list)
-				iter->pending = false;
-			list_for_each_entry(
-					iter,
-					&vinstr_ctx->waiting_clients,
-					list)
-				iter->pending = false;
-			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-		}
-	}
-}
-
-/**
- * kbasep_vinstr_update_client - copy accumulated counters to user readable
- *                               buffer and notify the user
- * @cli:       requesting client
- * @timestamp: timestamp when counters were collected
- * @event_id:  id of event that caused triggered counters collection
- *
- * Return: zero on success
- */
-static int kbasep_vinstr_update_client(
-		struct kbase_vinstr_client *cli, u64 timestamp,
-		enum base_hwcnt_reader_event event_id)
-{
-	int rcode = 0;
-	unsigned long flags;
-
-	/* Copy collected counters to user readable buffer. */
-	if (cli->buffer_count)
-		rcode = kbasep_vinstr_fill_dump_buffer(
-				cli, timestamp, event_id);
-	else if (cli->kernel_buffer)
-		rcode = kbasep_vinstr_fill_dump_buffer_kernel(cli);
-	else
-		rcode = kbasep_vinstr_fill_dump_buffer_legacy(cli);
-
-	/* Prepare for next request. */
-	memset(cli->accum_buffer, 0, cli->dump_size);
-
-	spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
-	/* Check if client was put to suspend state while it was being updated */
-	if (cli->suspended)
-		rcode = -EINVAL;
-	spin_unlock_irqrestore(&cli->vinstr_ctx->state_lock, flags);
-
-	if (rcode)
-		goto exit;
 
 	/* Notify client. Make sure all changes to memory are visible. */
 	wmb();
-	atomic_inc(&cli->write_idx);
-	wake_up_interruptible(&cli->waitq);
-
-exit:
-	return rcode;
+	atomic_inc(&vcli->write_idx);
+	wake_up_interruptible(&vcli->waitq);
+	return 0;
 }
 
 /**
- * kbasep_vinstr_wake_up_callback - vinstr wake up timer wake up function
+ * kbasep_vinstr_client_clear() - Reset all the client's counters to zero.
+ * @vcli: Non-NULL pointer to a vinstr client.
  *
- * @hrtimer: high resolution timer
- *
- * Return: High resolution timer restart enum.
+ * Return: 0 on success, else error code.
  */
-static enum hrtimer_restart kbasep_vinstr_wake_up_callback(
-		struct hrtimer *hrtimer)
+static int kbasep_vinstr_client_clear(struct kbase_vinstr_client *vcli)
 {
-	struct kbasep_vinstr_wake_up_timer *timer =
-		container_of(
-			hrtimer,
-			struct kbasep_vinstr_wake_up_timer,
-			hrtimer);
+	u64 ts_start_ns;
+	u64 ts_end_ns;
 
-	KBASE_DEBUG_ASSERT(timer);
+	WARN_ON(!vcli);
+	lockdep_assert_held(&vcli->vctx->lock);
 
-	atomic_set(&timer->vinstr_ctx->request_pending, 1);
-	wake_up_all(&timer->vinstr_ctx->waitq);
+	/* A virtualizer dump with a NULL buffer will just clear the virtualizer
+	 * client's buffer.
+	 */
+	return kbase_hwcnt_virtualizer_client_dump(
+		vcli->hvcli, &ts_start_ns, &ts_end_ns, NULL);
+}
 
+/**
+ * kbasep_vinstr_reschedule_worker() - Update next dump times for all periodic
+ *                                     vinstr clients, then reschedule the dump
+ *                                     worker appropriately.
+ * @vctx: Non-NULL pointer to the vinstr context.
+ *
+ * If there are no periodic clients, then the dump worker will not be
+ * rescheduled. Else, the dump worker will be rescheduled for the next periodic
+ * client dump.
+ */
+static void kbasep_vinstr_reschedule_worker(struct kbase_vinstr_context *vctx)
+{
+	u64 cur_ts_ns;
+	u64 earliest_next_ns = U64_MAX;
+	struct kbase_vinstr_client *pos;
+
+	WARN_ON(!vctx);
+	lockdep_assert_held(&vctx->lock);
+
+	cur_ts_ns = kbasep_vinstr_timestamp_ns();
+
+	/*
+	 * Update each client's next dump time, and find the earliest next
+	 * dump time if any of the clients have a non-zero interval.
+	 */
+	list_for_each_entry(pos, &vctx->clients, node) {
+		const u64 cli_next_ns =
+			kbasep_vinstr_next_dump_time_ns(
+				cur_ts_ns, pos->dump_interval_ns);
+
+		/* Non-zero next dump time implies a periodic client */
+		if ((cli_next_ns != 0) && (cli_next_ns < earliest_next_ns))
+			earliest_next_ns = cli_next_ns;
+
+		pos->next_dump_time_ns = cli_next_ns;
+	}
+
+	/* Cancel the timer if it is already pending */
+	hrtimer_cancel(&vctx->dump_timer);
+
+	/* Start the timer if there are periodic clients and vinstr is not
+	 * suspended.
+	 */
+	if ((earliest_next_ns != U64_MAX) &&
+	    (vctx->suspend_count == 0) &&
+	    !WARN_ON(earliest_next_ns < cur_ts_ns))
+		hrtimer_start(
+			&vctx->dump_timer,
+			ns_to_ktime(earliest_next_ns - cur_ts_ns),
+			HRTIMER_MODE_REL);
+}
+
+/**
+ * kbasep_vinstr_dump_worker()- Dump worker, that dumps all periodic clients
+ *                              that need to be dumped, then reschedules itself.
+ * @work: Work structure.
+ */
+static void kbasep_vinstr_dump_worker(struct work_struct *work)
+{
+	struct kbase_vinstr_context *vctx =
+		container_of(work, struct kbase_vinstr_context, dump_work);
+	struct kbase_vinstr_client *pos;
+	u64 cur_time_ns;
+
+	mutex_lock(&vctx->lock);
+
+	cur_time_ns = kbasep_vinstr_timestamp_ns();
+
+	/* Dump all periodic clients whose next dump time is before the current
+	 * time.
+	 */
+	list_for_each_entry(pos, &vctx->clients, node) {
+		if ((pos->next_dump_time_ns != 0) &&
+			(pos->next_dump_time_ns < cur_time_ns))
+			kbasep_vinstr_client_dump(
+				pos, BASE_HWCNT_READER_EVENT_PERIODIC);
+	}
+
+	/* Update the next dump times of all periodic clients, then reschedule
+	 * this worker at the earliest next dump time.
+	 */
+	kbasep_vinstr_reschedule_worker(vctx);
+
+	mutex_unlock(&vctx->lock);
+}
+
+/**
+ * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for
+ *                              execution as soon as possible.
+ * @timer: Timer structure.
+ */
+static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer)
+{
+	struct kbase_vinstr_context *vctx =
+		container_of(timer, struct kbase_vinstr_context, dump_timer);
+
+	/* We don't need to check vctx->suspend_count here, as the suspend
+	 * function will ensure that any worker enqueued here is immediately
+	 * cancelled, and the worker itself won't reschedule this timer if
+	 * suspend_count != 0.
+	 */
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+	queue_work(system_wq, &vctx->dump_work);
+#else
+	queue_work(system_highpri_wq, &vctx->dump_work);
+#endif
 	return HRTIMER_NORESTART;
 }
 
 /**
- * kbasep_vinstr_service_task - HWC dumping service thread
- *
- * @data: Pointer to vinstr context structure.
- *
- * Return: 0 on success; -ENOMEM if timer allocation fails
+ * kbasep_vinstr_client_destroy() - Destroy a vinstr client.
+ * @vcli: vinstr client. Must not be attached to a vinstr context.
  */
-static int kbasep_vinstr_service_task(void *data)
+static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli)
 {
-	struct kbase_vinstr_context        *vinstr_ctx = data;
-	struct kbasep_vinstr_wake_up_timer *timer;
+	if (!vcli)
+		return;
 
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	kbase_hwcnt_virtualizer_client_destroy(vcli->hvcli);
+	kfree(vcli->dump_bufs_meta);
+	kbase_hwcnt_dump_buffer_array_free(&vcli->dump_bufs);
+	kbase_hwcnt_enable_map_free(&vcli->enable_map);
+	kfree(vcli);
+}
 
-	timer = kmalloc(sizeof(*timer), GFP_KERNEL);
+/**
+ * kbasep_vinstr_client_create() - Create a vinstr client. Does not attach to
+ *                                 the vinstr context.
+ * @vctx:     Non-NULL pointer to vinstr context.
+ * @setup:    Non-NULL pointer to hardware counter ioctl setup structure.
+ *            setup->buffer_count must not be 0.
+ * @out_vcli: Non-NULL pointer to where created client will be stored on
+ *            success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_vinstr_client_create(
+	struct kbase_vinstr_context *vctx,
+	struct kbase_ioctl_hwcnt_reader_setup *setup,
+	struct kbase_vinstr_client **out_vcli)
+{
+	int errcode;
+	struct kbase_vinstr_client *vcli;
+	struct kbase_hwcnt_physical_enable_map phys_em;
 
-	if (!timer) {
-		dev_warn(vinstr_ctx->kbdev->dev, "Timer allocation failed!\n");
+	WARN_ON(!vctx);
+	WARN_ON(!setup);
+	WARN_ON(setup->buffer_count == 0);
+
+	vcli = kzalloc(sizeof(*vcli), GFP_KERNEL);
+	if (!vcli)
 		return -ENOMEM;
-	}
 
-	hrtimer_init(&timer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	vcli->vctx = vctx;
 
-	timer->hrtimer.function = kbasep_vinstr_wake_up_callback;
-	timer->vinstr_ctx       = vinstr_ctx;
+	errcode = kbase_hwcnt_enable_map_alloc(
+		vctx->metadata, &vcli->enable_map);
+	if (errcode)
+		goto error;
 
-	while (!kthread_should_stop()) {
-		struct kbase_vinstr_client *cli = NULL;
-		struct kbase_vinstr_client *tmp;
-		int                        rcode;
-		unsigned long              flags;
+	phys_em.jm_bm = setup->jm_bm;
+	phys_em.shader_bm = setup->shader_bm;
+	phys_em.tiler_bm = setup->tiler_bm;
+	phys_em.mmu_l2_bm = setup->mmu_l2_bm;
+	kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em);
 
-		u64              timestamp = kbasep_vinstr_get_timestamp();
-		u64              dump_time = 0;
-		struct list_head expired_requests;
+	errcode = kbase_hwcnt_dump_buffer_array_alloc(
+		vctx->metadata, setup->buffer_count, &vcli->dump_bufs);
+	if (errcode)
+		goto error;
 
-		/* Hold lock while performing operations on lists of clients. */
-		mutex_lock(&vinstr_ctx->lock);
+	errcode = -ENOMEM;
+	vcli->dump_bufs_meta = kmalloc_array(
+		setup->buffer_count, sizeof(*vcli->dump_bufs_meta), GFP_KERNEL);
+	if (!vcli->dump_bufs_meta)
+		goto error;
 
-		/* Closing thread must not interact with client requests. */
-		if (current == vinstr_ctx->thread) {
-			atomic_set(&vinstr_ctx->request_pending, 0);
+	errcode = kbase_hwcnt_virtualizer_client_create(
+		vctx->hvirt, &vcli->enable_map, &vcli->hvcli);
+	if (errcode)
+		goto error;
 
-			spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-			if (!list_empty(&vinstr_ctx->waiting_clients)) {
-				cli = list_first_entry(
-						&vinstr_ctx->waiting_clients,
-						struct kbase_vinstr_client,
-						list);
-				dump_time = cli->dump_time;
-			}
-			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-		}
+	init_waitqueue_head(&vcli->waitq);
 
-		if (!cli || ((s64)timestamp - (s64)dump_time < 0ll)) {
-			mutex_unlock(&vinstr_ctx->lock);
+	*out_vcli = vcli;
+	return 0;
+error:
+	kbasep_vinstr_client_destroy(vcli);
+	return errcode;
+}
 
-			/* Sleep until next dumping event or service request. */
-			if (cli) {
-				u64 diff = dump_time - timestamp;
+int kbase_vinstr_init(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_vinstr_context **out_vctx)
+{
+	struct kbase_vinstr_context *vctx;
+	const struct kbase_hwcnt_metadata *metadata;
 
-				hrtimer_start(
-						&timer->hrtimer,
-						ns_to_ktime(diff),
-						HRTIMER_MODE_REL);
-			}
-			wait_event_interruptible(
-					vinstr_ctx->waitq,
-					atomic_read(
-						&vinstr_ctx->request_pending) ||
-					kthread_should_stop());
-			hrtimer_cancel(&timer->hrtimer);
-			continue;
-		}
+	if (!hvirt || !out_vctx)
+		return -EINVAL;
 
-		rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx,
-				&timestamp);
+	metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
+	if (!metadata)
+		return -EINVAL;
 
-		INIT_LIST_HEAD(&expired_requests);
+	vctx = kzalloc(sizeof(*vctx), GFP_KERNEL);
+	if (!vctx)
+		return -ENOMEM;
 
-		spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-		/* Find all expired requests. */
-		list_for_each_entry_safe(
-				cli,
-				tmp,
-				&vinstr_ctx->waiting_clients,
-				list) {
-			s64 tdiff =
-				(s64)(timestamp + DUMPING_RESOLUTION) -
-				(s64)cli->dump_time;
-			if (tdiff >= 0ll) {
-				list_del(&cli->list);
-				list_add(&cli->list, &expired_requests);
-			} else {
-				break;
-			}
-		}
+	vctx->hvirt = hvirt;
+	vctx->metadata = metadata;
 
-		/* Fill data for each request found. */
-		while (!list_empty(&expired_requests)) {
-			cli = list_first_entry(&expired_requests,
-					struct kbase_vinstr_client, list);
+	mutex_init(&vctx->lock);
+	INIT_LIST_HEAD(&vctx->clients);
+	hrtimer_init(&vctx->dump_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	vctx->dump_timer.function = kbasep_vinstr_dump_timer;
+	INIT_WORK(&vctx->dump_work, kbasep_vinstr_dump_worker);
 
-			/* Ensure that legacy buffer will not be used from
-			 * this kthread context. */
-			BUG_ON(0 == cli->buffer_count);
-			/* Expect only periodically sampled clients. */
-			BUG_ON(0 == cli->dump_interval);
-
-			/* Release the spinlock, as filling the data in client's
-			 * userspace buffer could result in page faults. */
-			spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-			if (!rcode)
-				kbasep_vinstr_update_client(
-						cli,
-						timestamp,
-						BASE_HWCNT_READER_EVENT_PERIODIC);
-			spin_lock_irqsave(&cli->vinstr_ctx->state_lock, flags);
-
-			/* This client got suspended, move to the next one. */
-			if (cli->suspended)
-				continue;
-
-			/* Set new dumping time. Drop missed probing times. */
-			do {
-				cli->dump_time += cli->dump_interval;
-			} while (cli->dump_time < timestamp);
-
-			list_del(&cli->list);
-			kbasep_vinstr_add_dump_request(
-					cli,
-					&vinstr_ctx->waiting_clients);
-		}
-		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-		/* Reprogram counters set if required. */
-		kbasep_vinstr_reprogram(vinstr_ctx);
-
-		mutex_unlock(&vinstr_ctx->lock);
-	}
-
-	kfree(timer);
-
+	*out_vctx = vctx;
 	return 0;
 }
 
-/*****************************************************************************/
+void kbase_vinstr_term(struct kbase_vinstr_context *vctx)
+{
+	if (!vctx)
+		return;
+
+	cancel_work_sync(&vctx->dump_work);
+
+	/* Non-zero client count implies client leak */
+	if (WARN_ON(vctx->client_count != 0)) {
+		struct kbase_vinstr_client *pos, *n;
+
+		list_for_each_entry_safe(pos, n, &vctx->clients, node) {
+			list_del(&pos->node);
+			vctx->client_count--;
+			kbasep_vinstr_client_destroy(pos);
+		}
+	}
+
+	WARN_ON(vctx->client_count != 0);
+	kfree(vctx);
+}
+
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx)
+{
+	if (WARN_ON(!vctx))
+		return;
+
+	mutex_lock(&vctx->lock);
+
+	if (!WARN_ON(vctx->suspend_count == SIZE_MAX))
+		vctx->suspend_count++;
+
+	mutex_unlock(&vctx->lock);
+
+	/* Always sync cancel the timer and then the worker, regardless of the
+	 * new suspend count.
+	 *
+	 * This ensures concurrent calls to kbase_vinstr_suspend() always block
+	 * until vinstr is fully suspended.
+	 *
+	 * The timer is cancelled before the worker, as the timer
+	 * unconditionally re-enqueues the worker, but the worker checks the
+	 * suspend_count that we just incremented before rescheduling the timer.
+	 *
+	 * Therefore if we cancel the worker first, the timer might re-enqueue
+	 * the worker before we cancel the timer, but the opposite is not
+	 * possible.
+	 */
+	hrtimer_cancel(&vctx->dump_timer);
+	cancel_work_sync(&vctx->dump_work);
+}
+
+void kbase_vinstr_resume(struct kbase_vinstr_context *vctx)
+{
+	if (WARN_ON(!vctx))
+		return;
+
+	mutex_lock(&vctx->lock);
+
+	if (!WARN_ON(vctx->suspend_count == 0)) {
+		vctx->suspend_count--;
+
+		/* Last resume, so re-enqueue the worker if we have any periodic
+		 * clients.
+		 */
+		if (vctx->suspend_count == 0) {
+			struct kbase_vinstr_client *pos;
+			bool has_periodic_clients = false;
+
+			list_for_each_entry(pos, &vctx->clients, node) {
+				if (pos->dump_interval_ns != 0) {
+					has_periodic_clients = true;
+					break;
+				}
+			}
+
+			if (has_periodic_clients)
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+				queue_work(system_wq, &vctx->dump_work);
+#else
+				queue_work(system_highpri_wq, &vctx->dump_work);
+#endif
+		}
+	}
+
+	mutex_unlock(&vctx->lock);
+}
+
+int kbase_vinstr_hwcnt_reader_setup(
+	struct kbase_vinstr_context *vctx,
+	struct kbase_ioctl_hwcnt_reader_setup *setup)
+{
+	int errcode;
+	int fd;
+	struct kbase_vinstr_client *vcli = NULL;
+
+	if (!vctx || !setup ||
+	    (setup->buffer_count == 0) ||
+	    (setup->buffer_count > MAX_BUFFER_COUNT))
+		return -EINVAL;
+
+	errcode = kbasep_vinstr_client_create(vctx, setup, &vcli);
+	if (errcode)
+		goto error;
+
+	errcode = anon_inode_getfd(
+		"[mali_vinstr_desc]",
+		&vinstr_client_fops,
+		vcli,
+		O_RDONLY | O_CLOEXEC);
+	if (errcode < 0)
+		goto error;
+
+	fd = errcode;
+
+	/* Add the new client. No need to reschedule worker, as not periodic */
+	mutex_lock(&vctx->lock);
+
+	vctx->client_count++;
+	list_add(&vcli->node, &vctx->clients);
+
+	mutex_unlock(&vctx->lock);
+
+	return fd;
+error:
+	kbasep_vinstr_client_destroy(vcli);
+	return errcode;
+}
 
 /**
- * kbasep_vinstr_hwcnt_reader_buffer_ready - check if client has ready buffers
- * @cli: pointer to vinstr client structure
+ * kbasep_vinstr_hwcnt_reader_buffer_ready() - Check if client has ready
+ *                                             buffers.
+ * @cli: Non-NULL pointer to vinstr client.
  *
- * Return: non-zero if client has at least one dumping buffer filled that was
- *         not notified to user yet
+ * Return: Non-zero if client has at least one dumping buffer filled that was
+ *         not notified to user yet.
  */
 static int kbasep_vinstr_hwcnt_reader_buffer_ready(
-		struct kbase_vinstr_client *cli)
+	struct kbase_vinstr_client *cli)
 {
-	KBASE_DEBUG_ASSERT(cli);
+	WARN_ON(!cli);
 	return atomic_read(&cli->write_idx) != atomic_read(&cli->meta_idx);
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer - hwcnt reader's ioctl command
- * @cli:    pointer to vinstr client structure
- * @buffer: pointer to userspace buffer
- * @size:   size of buffer
+ * kbasep_vinstr_hwcnt_reader_ioctl_dump() - Dump ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
  *
- * Return: zero on success
+ * Return: 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_dump(
+	struct kbase_vinstr_client *cli)
+{
+	int errcode;
+
+	mutex_lock(&cli->vctx->lock);
+
+	errcode = kbasep_vinstr_client_dump(
+		cli, BASE_HWCNT_READER_EVENT_MANUAL);
+
+	mutex_unlock(&cli->vctx->lock);
+	return errcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_clear() - Clear ioctl command.
+ * @cli: Non-NULL pointer to vinstr client.
+ *
+ * Return: 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_clear(
+	struct kbase_vinstr_client *cli)
+{
+	int errcode;
+
+	mutex_lock(&cli->vctx->lock);
+
+	errcode = kbasep_vinstr_client_clear(cli);
+
+	mutex_unlock(&cli->vctx->lock);
+	return errcode;
+}
+
+/**
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_buffer() - Get buffer ioctl command.
+ * @cli:    Non-NULL pointer to vinstr client.
+ * @buffer: Non-NULL pointer to userspace buffer.
+ * @size:   Size of buffer.
+ *
+ * Return: 0 on success, else error code.
  */
 static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
-		struct kbase_vinstr_client *cli, void __user *buffer,
-		size_t size)
+	struct kbase_vinstr_client *cli,
+	void __user *buffer,
+	size_t size)
 {
 	unsigned int meta_idx = atomic_read(&cli->meta_idx);
-	unsigned int idx = meta_idx % cli->buffer_count;
+	unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt;
 
-	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_buffers_meta[idx];
+	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx];
 
 	/* Metadata sanity check. */
-	KBASE_DEBUG_ASSERT(idx == meta->buffer_idx);
+	WARN_ON(idx != meta->buffer_idx);
 
 	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
 		return -EINVAL;
@@ -1470,19 +694,20 @@
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer - hwcnt reader's ioctl command
- * @cli:    pointer to vinstr client structure
- * @buffer: pointer to userspace buffer
- * @size:   size of buffer
+ * kbasep_vinstr_hwcnt_reader_ioctl_put_buffer() - Put buffer ioctl command.
+ * @cli:    Non-NULL pointer to vinstr client.
+ * @buffer: Non-NULL pointer to userspace buffer.
+ * @size:   Size of buffer.
  *
- * Return: zero on success
+ * Return: 0 on success, else error code.
  */
 static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
-		struct kbase_vinstr_client *cli, void __user *buffer,
-		size_t size)
+	struct kbase_vinstr_client *cli,
+	void __user *buffer,
+	size_t size)
 {
 	unsigned int read_idx = atomic_read(&cli->read_idx);
-	unsigned int idx = read_idx % cli->buffer_count;
+	unsigned int idx = read_idx % cli->dump_bufs.buf_cnt;
 
 	struct kbase_hwcnt_reader_metadata meta;
 
@@ -1505,182 +730,126 @@
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_ioctl_set_interval - hwcnt reader's ioctl command
- * @cli:      pointer to vinstr client structure
- * @interval: periodic dumping interval (disable periodic dumping if zero)
+ * kbasep_vinstr_hwcnt_reader_ioctl_set_interval() - Set interval ioctl command.
+ * @cli:      Non-NULL pointer to vinstr client.
+ * @interval: Periodic dumping interval (disable periodic dumping if 0).
  *
- * Return: zero on success
+ * Return: 0 always.
  */
 static long kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
-		struct kbase_vinstr_client *cli, u32 interval)
+	struct kbase_vinstr_client *cli,
+	u32 interval)
 {
-	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
-	unsigned long flags;
+	mutex_lock(&cli->vctx->lock);
 
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
+	if ((interval != 0) && (interval < DUMP_INTERVAL_MIN_NS))
+		interval = DUMP_INTERVAL_MIN_NS;
+	/* Update the interval, and put in a dummy next dump time */
+	cli->dump_interval_ns = interval;
+	cli->next_dump_time_ns = 0;
 
-	mutex_lock(&vinstr_ctx->lock);
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
+	/*
+	 * If it's a periodic client, kick off the worker early to do a proper
+	 * timer reschedule. Return value is ignored, as we don't care if the
+	 * worker is already queued.
+	 */
+	if ((interval != 0) && (cli->vctx->suspend_count == 0))
+#if KERNEL_VERSION(3, 16, 0) > LINUX_VERSION_CODE
+		queue_work(system_wq, &cli->vctx->dump_work);
+#else
+		queue_work(system_highpri_wq, &cli->vctx->dump_work);
+#endif
 
-	if (cli->suspended) {
-		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-		mutex_unlock(&vinstr_ctx->lock);
-		return -ENOMEM;
-	}
-
-	list_del(&cli->list);
-
-	cli->dump_interval = interval;
-
-	/* If interval is non-zero, enable periodic dumping for this client. */
-	if (cli->dump_interval) {
-		if (DUMPING_RESOLUTION > cli->dump_interval)
-			cli->dump_interval = DUMPING_RESOLUTION;
-		cli->dump_time =
-			kbasep_vinstr_get_timestamp() + cli->dump_interval;
-
-		kbasep_vinstr_add_dump_request(
-				cli, &vinstr_ctx->waiting_clients);
-
-		atomic_set(&vinstr_ctx->request_pending, 1);
-		wake_up_all(&vinstr_ctx->waitq);
-	} else {
-		list_add(&cli->list, &vinstr_ctx->idle_clients);
-	}
-
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-	mutex_unlock(&vinstr_ctx->lock);
+	mutex_unlock(&cli->vctx->lock);
 
 	return 0;
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_event_mask - return event mask for event id
- * @event_id: id of event
- * Return: event_mask or zero if event is not supported or maskable
- */
-static u32 kbasep_vinstr_hwcnt_reader_event_mask(
-		enum base_hwcnt_reader_event event_id)
-{
-	u32 event_mask = 0;
-
-	switch (event_id) {
-	case BASE_HWCNT_READER_EVENT_PREJOB:
-	case BASE_HWCNT_READER_EVENT_POSTJOB:
-		/* These event are maskable. */
-		event_mask = (1 << event_id);
-		break;
-
-	case BASE_HWCNT_READER_EVENT_MANUAL:
-	case BASE_HWCNT_READER_EVENT_PERIODIC:
-		/* These event are non-maskable. */
-	default:
-		/* These event are not supported. */
-		break;
-	}
-
-	return event_mask;
-}
-
-/**
- * kbasep_vinstr_hwcnt_reader_ioctl_enable_event - hwcnt reader's ioctl command
- * @cli:      pointer to vinstr client structure
- * @event_id: id of event to enable
+ * kbasep_vinstr_hwcnt_reader_ioctl_enable_event() - Enable event ioctl command.
+ * @cli:      Non-NULL pointer to vinstr client.
+ * @event_id: ID of event to enable.
  *
- * Return: zero on success
+ * Return: 0 always.
  */
 static long kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
 		struct kbase_vinstr_client *cli,
 		enum base_hwcnt_reader_event event_id)
 {
-	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
-	u32                         event_mask;
-
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-
-	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
-	if (!event_mask)
-		return -EINVAL;
-
-	mutex_lock(&vinstr_ctx->lock);
-	cli->event_mask |= event_mask;
-	mutex_unlock(&vinstr_ctx->lock);
-
+	/* No-op, as events aren't supported */
 	return 0;
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_ioctl_disable_event - hwcnt reader's ioctl command
- * @cli:      pointer to vinstr client structure
- * @event_id: id of event to disable
+ * kbasep_vinstr_hwcnt_reader_ioctl_disable_event() - Disable event ioctl
+ *                                                    command.
+ * @cli:      Non-NULL pointer to vinstr client.
+ * @event_id: ID of event to disable.
  *
- * Return: zero on success
+ * Return: 0 always.
  */
 static long kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
-		struct kbase_vinstr_client *cli,
-		enum base_hwcnt_reader_event event_id)
+	struct kbase_vinstr_client *cli,
+	enum base_hwcnt_reader_event event_id)
 {
-	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
-	u32                         event_mask;
-
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-
-	event_mask = kbasep_vinstr_hwcnt_reader_event_mask(event_id);
-	if (!event_mask)
-		return -EINVAL;
-
-	mutex_lock(&vinstr_ctx->lock);
-	cli->event_mask &= ~event_mask;
-	mutex_unlock(&vinstr_ctx->lock);
-
+	/* No-op, as events aren't supported */
 	return 0;
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver - hwcnt reader's ioctl command
- * @cli:   pointer to vinstr client structure
- * @hwver: pointer to user buffer where hw version will be stored
+ * kbasep_vinstr_hwcnt_reader_ioctl_get_hwver() - Get HW version ioctl command.
+ * @cli:   Non-NULL pointer to vinstr client.
+ * @hwver: Non-NULL pointer to user buffer where HW version will be stored.
  *
- * Return: zero on success
+ * Return: 0 on success, else error code.
  */
 static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
-		struct kbase_vinstr_client *cli, u32 __user *hwver)
+	struct kbase_vinstr_client *cli,
+	u32 __user *hwver)
 {
-#ifndef CONFIG_MALI_NO_MALI
-	struct kbase_vinstr_context *vinstr_ctx = cli->vinstr_ctx;
-#endif
+	u32 ver = 0;
+	const enum kbase_hwcnt_gpu_group_type type =
+		kbase_hwcnt_metadata_group_type(cli->vctx->metadata, 0);
 
-	u32                         ver = 5;
-
-#ifndef CONFIG_MALI_NO_MALI
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-	if (kbase_hw_has_feature(vinstr_ctx->kbdev, BASE_HW_FEATURE_V4))
+	switch (type) {
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V4:
 		ver = 4;
-#endif
+		break;
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+		ver = 5;
+		break;
+	default:
+		WARN_ON(true);
+	}
 
-	return put_user(ver, hwver);
+	if (ver != 0) {
+		return put_user(ver, hwver);
+	} else {
+		return -EINVAL;
+	}
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_ioctl - hwcnt reader's ioctl
- * @filp:   pointer to file structure
- * @cmd:    user command
- * @arg:    command's argument
+ * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl.
+ * @filp:   Non-NULL pointer to file structure.
+ * @cmd:    User command.
+ * @arg:    Command's argument.
  *
- * Return: zero on success
+ * Return: 0 on success, else error code.
  */
-static long kbasep_vinstr_hwcnt_reader_ioctl(struct file *filp,
-		unsigned int cmd, unsigned long arg)
+static long kbasep_vinstr_hwcnt_reader_ioctl(
+	struct file *filp,
+	unsigned int cmd,
+	unsigned long arg)
 {
-	long                       rcode = 0;
+	long rcode;
 	struct kbase_vinstr_client *cli;
 
-	KBASE_DEBUG_ASSERT(filp);
+	if (!filp || (_IOC_TYPE(cmd) != KBASE_HWCNT_READER))
+		return -EINVAL;
 
 	cli = filp->private_data;
-	KBASE_DEBUG_ASSERT(cli);
-
-	if (unlikely(KBASE_HWCNT_READER != _IOC_TYPE(cmd)))
+	if (!cli)
 		return -EINVAL;
 
 	switch (cmd) {
@@ -1689,42 +858,41 @@
 		break;
 	case KBASE_HWCNT_READER_GET_HWVER:
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
-				cli, (u32 __user *)arg);
+			cli, (u32 __user *)arg);
 		break;
 	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
-		KBASE_DEBUG_ASSERT(cli->vinstr_ctx);
 		rcode = put_user(
-				(u32)cli->vinstr_ctx->dump_size,
-				(u32 __user *)arg);
+			(u32)cli->vctx->metadata->dump_buf_bytes,
+			(u32 __user *)arg);
 		break;
 	case KBASE_HWCNT_READER_DUMP:
-		rcode = kbase_vinstr_hwc_dump(
-				cli, BASE_HWCNT_READER_EVENT_MANUAL);
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli);
 		break;
 	case KBASE_HWCNT_READER_CLEAR:
-		rcode = kbase_vinstr_hwc_clear(cli);
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli);
 		break;
 	case KBASE_HWCNT_READER_GET_BUFFER:
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
-				cli, (void __user *)arg, _IOC_SIZE(cmd));
+			cli, (void __user *)arg, _IOC_SIZE(cmd));
 		break;
 	case KBASE_HWCNT_READER_PUT_BUFFER:
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
-				cli, (void __user *)arg, _IOC_SIZE(cmd));
+			cli, (void __user *)arg, _IOC_SIZE(cmd));
 		break;
 	case KBASE_HWCNT_READER_SET_INTERVAL:
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
-				cli, (u32)arg);
+			cli, (u32)arg);
 		break;
 	case KBASE_HWCNT_READER_ENABLE_EVENT:
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
-				cli, (enum base_hwcnt_reader_event)arg);
+			cli, (enum base_hwcnt_reader_event)arg);
 		break;
 	case KBASE_HWCNT_READER_DISABLE_EVENT:
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
-				cli, (enum base_hwcnt_reader_event)arg);
+			cli, (enum base_hwcnt_reader_event)arg);
 		break;
 	default:
+		WARN_ON(true);
 		rcode = -EINVAL;
 		break;
 	}
@@ -1733,21 +901,25 @@
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_poll - hwcnt reader's poll
- * @filp: pointer to file structure
- * @wait: pointer to poll table
- * Return: POLLIN if data can be read without blocking, otherwise zero
+ * kbasep_vinstr_hwcnt_reader_poll() - hwcnt reader's poll.
+ * @filp: Non-NULL pointer to file structure.
+ * @wait: Non-NULL pointer to poll table.
+ *
+ * Return: POLLIN if data can be read without blocking, 0 if data can not be
+ *         read without blocking, else error code.
  */
-static unsigned int kbasep_vinstr_hwcnt_reader_poll(struct file *filp,
-		poll_table *wait)
+static unsigned int kbasep_vinstr_hwcnt_reader_poll(
+	struct file *filp,
+	poll_table *wait)
 {
 	struct kbase_vinstr_client *cli;
 
-	KBASE_DEBUG_ASSERT(filp);
-	KBASE_DEBUG_ASSERT(wait);
+	if (!filp || !wait)
+		return -EINVAL;
 
 	cli = filp->private_data;
-	KBASE_DEBUG_ASSERT(cli);
+	if (!cli)
+		return -EINVAL;
 
 	poll_wait(filp, &cli->waitq, wait);
 	if (kbasep_vinstr_hwcnt_reader_buffer_ready(cli))
@@ -1756,25 +928,28 @@
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_mmap - hwcnt reader's mmap
- * @filp: pointer to file structure
- * @vma:  pointer to vma structure
- * Return: zero on success
+ * kbasep_vinstr_hwcnt_reader_mmap() - hwcnt reader's mmap.
+ * @filp: Non-NULL pointer to file structure.
+ * @vma:  Non-NULL pointer to vma structure.
+ *
+ * Return: 0 on success, else error code.
  */
-static int kbasep_vinstr_hwcnt_reader_mmap(struct file *filp,
-		struct vm_area_struct *vma)
+static int kbasep_vinstr_hwcnt_reader_mmap(
+	struct file *filp,
+	struct vm_area_struct *vma)
 {
 	struct kbase_vinstr_client *cli;
-	unsigned long size, addr, pfn, offset;
-	unsigned long vm_size = vma->vm_end - vma->vm_start;
+	unsigned long vm_size, size, addr, pfn, offset;
 
-	KBASE_DEBUG_ASSERT(filp);
-	KBASE_DEBUG_ASSERT(vma);
+	if (!filp || !vma)
+		return -EINVAL;
 
 	cli = filp->private_data;
-	KBASE_DEBUG_ASSERT(cli);
+	if (!cli)
+		return -EINVAL;
 
-	size = cli->buffer_count * cli->dump_size;
+	vm_size = vma->vm_end - vma->vm_start;
+	size = cli->dump_bufs.buf_cnt * cli->vctx->metadata->dump_buf_bytes;
 
 	if (vma->vm_pgoff > (size >> PAGE_SHIFT))
 		return -EINVAL;
@@ -1783,579 +958,33 @@
 	if (vm_size > size - offset)
 		return -EINVAL;
 
-	addr = __pa((unsigned long)cli->dump_buffers + offset);
+	addr = __pa(cli->dump_bufs.page_addr + offset);
 	pfn = addr >> PAGE_SHIFT;
 
 	return remap_pfn_range(
-			vma,
-			vma->vm_start,
-			pfn,
-			vm_size,
-			vma->vm_page_prot);
+		vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot);
 }
 
 /**
- * kbasep_vinstr_hwcnt_reader_release - hwcnt reader's release
- * @inode: pointer to inode structure
- * @filp:  pointer to file structure
- * Return always return zero
+ * kbasep_vinstr_hwcnt_reader_release() - hwcnt reader's release.
+ * @inode: Non-NULL pointer to inode structure.
+ * @filp:  Non-NULL pointer to file structure.
+ *
+ * Return: 0 always.
  */
 static int kbasep_vinstr_hwcnt_reader_release(struct inode *inode,
-		struct file *filp)
+	struct file *filp)
 {
-	struct kbase_vinstr_client *cli;
+	struct kbase_vinstr_client *vcli = filp->private_data;
 
-	KBASE_DEBUG_ASSERT(inode);
-	KBASE_DEBUG_ASSERT(filp);
+	mutex_lock(&vcli->vctx->lock);
 
-	cli = filp->private_data;
-	KBASE_DEBUG_ASSERT(cli);
+	vcli->vctx->client_count--;
+	list_del(&vcli->node);
 
-	kbase_vinstr_detach_client(cli);
-	return 0;
-}
+	mutex_unlock(&vcli->vctx->lock);
 
-/*****************************************************************************/
-
-/**
- * kbasep_vinstr_kick_scheduler - trigger scheduler cycle
- * @kbdev: pointer to kbase device structure
- */
-static void kbasep_vinstr_kick_scheduler(struct kbase_device *kbdev)
-{
-	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
-	unsigned long flags;
-
-	down(&js_devdata->schedule_sem);
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_backend_slot_update(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-	up(&js_devdata->schedule_sem);
-}
-
-/**
- * kbasep_vinstr_suspend_worker - worker suspending vinstr module
- * @data: pointer to work structure
- */
-static void kbasep_vinstr_suspend_worker(struct work_struct *data)
-{
-	struct kbase_vinstr_context *vinstr_ctx;
-	unsigned long flags;
-
-	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
-			suspend_work);
-
-	mutex_lock(&vinstr_ctx->lock);
-
-	if (vinstr_ctx->kctx)
-		disable_hwcnt(vinstr_ctx);
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	vinstr_ctx->state = VINSTR_SUSPENDED;
-	wake_up_all(&vinstr_ctx->suspend_waitq);
-
-	if (vinstr_ctx->need_resume) {
-		vinstr_ctx->need_resume = false;
-		vinstr_ctx->state = VINSTR_RESUMING;
-		schedule_work(&vinstr_ctx->resume_work);
-
-		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-		mutex_unlock(&vinstr_ctx->lock);
-	} else {
-		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-		mutex_unlock(&vinstr_ctx->lock);
-
-		/* Kick GPU scheduler to allow entering protected mode.
-		 * This must happen after vinstr was suspended.
-		 */
-		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
-	}
-}
-
-/**
- * kbasep_vinstr_resume_worker - worker resuming vinstr module
- * @data: pointer to work structure
- */
-static void kbasep_vinstr_resume_worker(struct work_struct *data)
-{
-	struct kbase_vinstr_context *vinstr_ctx;
-	unsigned long flags;
-
-	vinstr_ctx = container_of(data, struct kbase_vinstr_context,
-			resume_work);
-
-	mutex_lock(&vinstr_ctx->lock);
-
-	if (vinstr_ctx->kctx)
-		enable_hwcnt(vinstr_ctx);
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	vinstr_ctx->state = VINSTR_IDLE;
-	wake_up_all(&vinstr_ctx->suspend_waitq);
-
-	if (vinstr_ctx->need_suspend) {
-		vinstr_ctx->need_suspend = false;
-		vinstr_ctx->state = VINSTR_SUSPENDING;
-		schedule_work(&vinstr_ctx->suspend_work);
-
-		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-		mutex_unlock(&vinstr_ctx->lock);
-	} else {
-		spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-		mutex_unlock(&vinstr_ctx->lock);
-
-		/* Kick GPU scheduler to allow entering protected mode.
-		 * Note that scheduler state machine might requested re-entry to
-		 * protected mode before vinstr was resumed.
-		 * This must happen after vinstr was release.
-		 */
-		kbasep_vinstr_kick_scheduler(vinstr_ctx->kbdev);
-	}
-}
-
-/*****************************************************************************/
-
-struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
-{
-	struct kbase_vinstr_context *vinstr_ctx;
-
-	vinstr_ctx = kzalloc(sizeof(*vinstr_ctx), GFP_KERNEL);
-	if (!vinstr_ctx)
-		return NULL;
-
-	INIT_LIST_HEAD(&vinstr_ctx->idle_clients);
-	INIT_LIST_HEAD(&vinstr_ctx->waiting_clients);
-	INIT_LIST_HEAD(&vinstr_ctx->suspended_clients);
-	mutex_init(&vinstr_ctx->lock);
-	spin_lock_init(&vinstr_ctx->state_lock);
-	vinstr_ctx->kbdev = kbdev;
-	vinstr_ctx->thread = NULL;
-	vinstr_ctx->state = VINSTR_IDLE;
-	vinstr_ctx->suspend_cnt = 0;
-	INIT_WORK(&vinstr_ctx->suspend_work, kbasep_vinstr_suspend_worker);
-	INIT_WORK(&vinstr_ctx->resume_work, kbasep_vinstr_resume_worker);
-	init_waitqueue_head(&vinstr_ctx->suspend_waitq);
-
-	atomic_set(&vinstr_ctx->request_pending, 0);
-	init_waitqueue_head(&vinstr_ctx->waitq);
-
-	return vinstr_ctx;
-}
-
-void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx)
-{
-	struct kbase_vinstr_client *cli;
-
-	/* Stop service thread first. */
-	if (vinstr_ctx->thread)
-		kthread_stop(vinstr_ctx->thread);
-
-	/* Wait for workers. */
-	flush_work(&vinstr_ctx->suspend_work);
-	flush_work(&vinstr_ctx->resume_work);
-
-	while (1) {
-		struct list_head *list = &vinstr_ctx->idle_clients;
-
-		if (list_empty(list)) {
-			list = &vinstr_ctx->waiting_clients;
-			if (list_empty(list)) {
-				list = &vinstr_ctx->suspended_clients;
-				if (list_empty(list))
-					break;
-			}
-		}
-
-		cli = list_first_entry(list, struct kbase_vinstr_client, list);
-		list_del(&cli->list);
-		if (!cli->suspended)
-			vinstr_ctx->nclients--;
-		else
-			vinstr_ctx->nclients_suspended--;
-		kfree(cli->accum_buffer);
-		kfree(cli);
-	}
-	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients);
-	KBASE_DEBUG_ASSERT(!vinstr_ctx->nclients_suspended);
-	if (vinstr_ctx->kctx)
-		kbasep_vinstr_destroy_kctx(vinstr_ctx);
-	kfree(vinstr_ctx);
-}
-
-int kbase_vinstr_hwcnt_reader_setup(struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_ioctl_hwcnt_reader_setup *setup)
-{
-	struct kbase_vinstr_client  *cli;
-	u32                         bitmap[4];
-	int                         fd;
-
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-	KBASE_DEBUG_ASSERT(setup);
-	KBASE_DEBUG_ASSERT(setup->buffer_count);
-
-	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
-	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
-	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
-	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
-
-	cli = kbasep_vinstr_attach_client(
-			vinstr_ctx,
-			setup->buffer_count,
-			bitmap,
-			&fd,
-			NULL);
-
-	if (!cli)
-		return -ENOMEM;
-
-	kbase_vinstr_wait_for_ready(vinstr_ctx);
-	return fd;
-}
-
-int kbase_vinstr_legacy_hwc_setup(
-		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_vinstr_client  **cli,
-		struct kbase_ioctl_hwcnt_enable *enable)
-{
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-	KBASE_DEBUG_ASSERT(enable);
-	KBASE_DEBUG_ASSERT(cli);
-
-	if (enable->dump_buffer) {
-		u32 bitmap[4];
-
-		bitmap[SHADER_HWCNT_BM] = enable->shader_bm;
-		bitmap[TILER_HWCNT_BM]  = enable->tiler_bm;
-		bitmap[MMU_L2_HWCNT_BM] = enable->mmu_l2_bm;
-		bitmap[JM_HWCNT_BM]     = enable->jm_bm;
-
-		if (*cli)
-			return -EBUSY;
-
-		*cli = kbasep_vinstr_attach_client(
-				vinstr_ctx,
-				0,
-				bitmap,
-				(void *)(uintptr_t)enable->dump_buffer,
-				NULL);
-
-		if (!(*cli))
-			return -ENOMEM;
-
-		kbase_vinstr_wait_for_ready(vinstr_ctx);
-	} else {
-		if (!*cli)
-			return -EINVAL;
-
-		kbase_vinstr_detach_client(*cli);
-		*cli = NULL;
-	}
+	kbasep_vinstr_client_destroy(vcli);
 
 	return 0;
 }
-
-struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
-		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_ioctl_hwcnt_reader_setup *setup,
-		void *kernel_buffer)
-{
-	struct kbase_vinstr_client *kernel_client;
-	u32 bitmap[4];
-
-	if (!vinstr_ctx || !setup || !kernel_buffer)
-		return NULL;
-
-	bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
-	bitmap[TILER_HWCNT_BM]  = setup->tiler_bm;
-	bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
-	bitmap[JM_HWCNT_BM]     = setup->jm_bm;
-
-	kernel_client = kbasep_vinstr_attach_client(
-				vinstr_ctx,
-				0,
-				bitmap,
-				NULL,
-				kernel_buffer);
-
-	if (kernel_client)
-		kbase_vinstr_wait_for_ready(vinstr_ctx);
-
-	return kernel_client;
-}
-KBASE_EXPORT_TEST_API(kbase_vinstr_hwcnt_kernel_setup);
-
-int kbase_vinstr_hwc_dump(struct kbase_vinstr_client *cli,
-		enum base_hwcnt_reader_event event_id)
-{
-	int                         rcode = 0;
-	struct kbase_vinstr_context *vinstr_ctx;
-	u64                         timestamp;
-	u32                         event_mask;
-
-	if (!cli)
-		return -EINVAL;
-
-	vinstr_ctx = cli->vinstr_ctx;
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-
-	KBASE_DEBUG_ASSERT(event_id < BASE_HWCNT_READER_EVENT_COUNT);
-	event_mask = 1 << event_id;
-
-	mutex_lock(&vinstr_ctx->lock);
-
-	if (event_mask & cli->event_mask) {
-		rcode = kbasep_vinstr_collect_and_accumulate(
-				vinstr_ctx,
-				&timestamp);
-		if (rcode)
-			goto exit;
-
-		rcode = kbasep_vinstr_update_client(cli, timestamp, event_id);
-		if (rcode)
-			goto exit;
-
-		kbasep_vinstr_reprogram(vinstr_ctx);
-	}
-
-exit:
-	mutex_unlock(&vinstr_ctx->lock);
-
-	return rcode;
-}
-KBASE_EXPORT_TEST_API(kbase_vinstr_hwc_dump);
-
-int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli)
-{
-	struct kbase_vinstr_context *vinstr_ctx;
-	int                         rcode;
-	u64                         unused;
-
-	if (!cli)
-		return -EINVAL;
-
-	vinstr_ctx = cli->vinstr_ctx;
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-
-	mutex_lock(&vinstr_ctx->lock);
-
-	rcode = kbasep_vinstr_collect_and_accumulate(vinstr_ctx, &unused);
-	if (rcode)
-		goto exit;
-	rcode = kbase_instr_hwcnt_clear(vinstr_ctx->kctx);
-	if (rcode)
-		goto exit;
-	memset(cli->accum_buffer, 0, cli->dump_size);
-
-	kbasep_vinstr_reprogram(vinstr_ctx);
-
-exit:
-	mutex_unlock(&vinstr_ctx->lock);
-
-	return rcode;
-}
-
-int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx)
-{
-	unsigned long flags;
-	int ret = -EAGAIN;
-
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	vinstr_ctx->forced_suspend = true;
-	switch (vinstr_ctx->state) {
-	case VINSTR_SUSPENDED:
-		vinstr_ctx->suspend_cnt++;
-		/* overflow shall not happen */
-		BUG_ON(0 == vinstr_ctx->suspend_cnt);
-		ret = 0;
-		break;
-
-	case VINSTR_IDLE:
-		if (vinstr_ctx->clients_present) {
-			vinstr_ctx->state = VINSTR_SUSPENDING;
-			schedule_work(&vinstr_ctx->suspend_work);
-		} else {
-			vinstr_ctx->state = VINSTR_SUSPENDED;
-
-			vinstr_ctx->suspend_cnt++;
-			/* overflow shall not happen */
-			WARN_ON(0 == vinstr_ctx->suspend_cnt);
-			ret = 0;
-		}
-		break;
-
-	case VINSTR_DUMPING:
-		vinstr_ctx->state = VINSTR_SUSPENDING;
-		break;
-
-	case VINSTR_RESUMING:
-		vinstr_ctx->need_suspend = true;
-		break;
-
-	case VINSTR_SUSPENDING:
-		break;
-
-	default:
-		KBASE_DEBUG_ASSERT(0);
-		break;
-	}
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-	return ret;
-}
-
-static int kbase_vinstr_is_ready(struct kbase_vinstr_context *vinstr_ctx)
-{
-	unsigned long flags;
-	int ret = -EAGAIN;
-
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	switch (vinstr_ctx->state) {
-	case VINSTR_SUSPENDED:
-	case VINSTR_RESUMING:
-	case VINSTR_SUSPENDING:
-		break;
-
-	case VINSTR_IDLE:
-	case VINSTR_DUMPING:
-		ret = 0;
-		break;
-	default:
-		KBASE_DEBUG_ASSERT(0);
-		break;
-	}
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-
-	return ret;
-}
-
-void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx)
-{
-	wait_event(vinstr_ctx->suspend_waitq,
-			(0 == kbase_vinstr_try_suspend(vinstr_ctx)));
-}
-
-void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx)
-{
-	wait_event(vinstr_ctx->suspend_waitq,
-			(0 == kbase_vinstr_is_ready(vinstr_ctx)));
-}
-KBASE_EXPORT_TEST_API(kbase_vinstr_wait_for_ready);
-
-/**
- * kbase_vinstr_update_suspend - Update vinstr suspend/resume status depending
- *                               on nclients
- * @vinstr_ctx: vinstr context pointer
- *
- * This function should be called whenever vinstr_ctx->nclients changes. This
- * may cause vinstr to be suspended or resumed, depending on the number of
- * clients and whether IPA is suspended or not.
- */
-static void kbase_vinstr_update_suspend(struct kbase_vinstr_context *vinstr_ctx)
-{
-	lockdep_assert_held(&vinstr_ctx->state_lock);
-
-	switch (vinstr_ctx->state) {
-	case VINSTR_SUSPENDED:
-		if ((vinstr_ctx->nclients) && (0 == vinstr_ctx->suspend_cnt)) {
-			vinstr_ctx->state = VINSTR_RESUMING;
-			schedule_work(&vinstr_ctx->resume_work);
-		}
-		break;
-
-	case VINSTR_SUSPENDING:
-		if ((vinstr_ctx->nclients) && (!vinstr_ctx->forced_suspend))
-			vinstr_ctx->need_resume = true;
-		break;
-
-	case VINSTR_IDLE:
-		if (!vinstr_ctx->nclients) {
-			vinstr_ctx->state = VINSTR_SUSPENDING;
-			schedule_work(&vinstr_ctx->suspend_work);
-		}
-		break;
-
-	case VINSTR_DUMPING:
-		if (!vinstr_ctx->nclients)
-			vinstr_ctx->state = VINSTR_SUSPENDING;
-		break;
-
-	case VINSTR_RESUMING:
-		if (!vinstr_ctx->nclients)
-			vinstr_ctx->need_suspend = true;
-		break;
-	}
-}
-
-void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx)
-{
-	unsigned long flags;
-
-	KBASE_DEBUG_ASSERT(vinstr_ctx);
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-	BUG_ON(VINSTR_SUSPENDING == vinstr_ctx->state);
-	if (VINSTR_SUSPENDED == vinstr_ctx->state) {
-		BUG_ON(0 == vinstr_ctx->suspend_cnt);
-		vinstr_ctx->suspend_cnt--;
-		if (0 == vinstr_ctx->suspend_cnt) {
-			vinstr_ctx->forced_suspend = false;
-			if (vinstr_ctx->clients_present) {
-				vinstr_ctx->state = VINSTR_RESUMING;
-				schedule_work(&vinstr_ctx->resume_work);
-			} else {
-				vinstr_ctx->state = VINSTR_IDLE;
-			}
-		}
-	}
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-}
-
-void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client)
-{
-	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-
-	if (!client->suspended) {
-		list_del(&client->list);
-		list_add(&client->list, &vinstr_ctx->suspended_clients);
-
-		vinstr_ctx->nclients--;
-		vinstr_ctx->nclients_suspended++;
-		kbase_vinstr_update_suspend(vinstr_ctx);
-
-		client->suspended = true;
-	}
-
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-}
-
-void kbase_vinstr_resume_client(struct kbase_vinstr_client *client)
-{
-	struct kbase_vinstr_context *vinstr_ctx = client->vinstr_ctx;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vinstr_ctx->state_lock, flags);
-
-	if (client->suspended) {
-		list_del(&client->list);
-		list_add(&client->list, &vinstr_ctx->idle_clients);
-
-		vinstr_ctx->nclients++;
-		vinstr_ctx->nclients_suspended--;
-		kbase_vinstr_update_suspend(vinstr_ctx);
-
-		client->suspended = false;
-	}
-
-	spin_unlock_irqrestore(&vinstr_ctx->state_lock, flags);
-}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
index d32799f..81d315f 100644
--- a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -20,163 +20,72 @@
  *
  */
 
+/*
+ * Vinstr, used to provide an ioctl for userspace access to periodic hardware
+ * counters.
+ */
+
 #ifndef _KBASE_VINSTR_H_
 #define _KBASE_VINSTR_H_
 
-#include <mali_kbase_hwcnt_reader.h>
-#include <mali_kbase_ioctl.h>
-
-/*****************************************************************************/
-
 struct kbase_vinstr_context;
-struct kbase_vinstr_client;
-
-/*****************************************************************************/
+struct kbase_hwcnt_virtualizer;
+struct kbase_ioctl_hwcnt_reader_setup;
 
 /**
- * kbase_vinstr_init() - initialize the vinstr core
- * @kbdev: kbase device
+ * kbase_vinstr_init() - Initialise a vinstr context.
+ * @hvirt:    Non-NULL pointer to the hardware counter virtualizer.
+ * @out_vctx: Non-NULL pointer to where the pointer to the created vinstr
+ *            context will be stored on success.
  *
- * Return: pointer to the vinstr context on success or NULL on failure
+ * On creation, the suspend count of the context will be 0.
+ *
+ * Return: 0 on success, else error code.
  */
-struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+int kbase_vinstr_init(
+	struct kbase_hwcnt_virtualizer *hvirt,
+	struct kbase_vinstr_context **out_vctx);
 
 /**
- * kbase_vinstr_term() - terminate the vinstr core
- * @vinstr_ctx: vinstr context
+ * kbase_vinstr_term() - Terminate a vinstr context.
+ * @vctx: Pointer to the vinstr context to be terminated.
  */
-void kbase_vinstr_term(struct kbase_vinstr_context *vinstr_ctx);
+void kbase_vinstr_term(struct kbase_vinstr_context *vctx);
 
 /**
- * kbase_vinstr_hwcnt_reader_setup - configure hw counters reader
- * @vinstr_ctx: vinstr context
- * @setup:      reader's configuration
+ * kbase_vinstr_suspend() - Increment the suspend count of the context.
+ * @vctx: Non-NULL pointer to the vinstr context to be suspended.
  *
- * Return: file descriptor on success and a (negative) error code otherwise
+ * After this function call returns, it is guaranteed that all timers and
+ * workers in vinstr will be cancelled, and will not be re-triggered until
+ * after the context has been resumed. In effect, this means no new counter
+ * dumps will occur for any existing or subsequently added periodic clients.
  */
-int kbase_vinstr_hwcnt_reader_setup(
-		struct kbase_vinstr_context        *vinstr_ctx,
-		struct kbase_ioctl_hwcnt_reader_setup *setup);
+void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx);
 
 /**
- * kbase_vinstr_legacy_hwc_setup - configure hw counters for dumping
- * @vinstr_ctx: vinstr context
- * @cli:        pointer where to store pointer to new vinstr client structure
- * @enable:      hwc configuration
+ * kbase_vinstr_resume() - Decrement the suspend count of the context.
+ * @vctx: Non-NULL pointer to the vinstr context to be resumed.
  *
- * Return: zero on success
- */
-int kbase_vinstr_legacy_hwc_setup(
-		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_vinstr_client  **cli,
-		struct kbase_ioctl_hwcnt_enable *enable);
-
-/**
- * kbase_vinstr_hwcnt_kernel_setup - configure hw counters for kernel side
- *                                   client
- * @vinstr_ctx:    vinstr context
- * @setup:         reader's configuration
- * @kernel_buffer: pointer to dump buffer
+ * If a call to this function decrements the suspend count from 1 to 0, then
+ * normal operation of vinstr will be resumed (i.e. counter dumps will once
+ * again be automatically triggered for all periodic clients).
  *
- * setup->buffer_count is not used for kernel side clients.
- *
- * Return: pointer to client structure, or NULL on failure
- */
-struct kbase_vinstr_client *kbase_vinstr_hwcnt_kernel_setup(
-		struct kbase_vinstr_context *vinstr_ctx,
-		struct kbase_ioctl_hwcnt_reader_setup *setup,
-		void *kernel_buffer);
-
-/**
- * kbase_vinstr_hwc_dump - issue counter dump for vinstr client
- * @cli:      pointer to vinstr client
- * @event_id: id of event that triggered hwcnt dump
- *
- * Return: zero on success
- */
-int kbase_vinstr_hwc_dump(
-		struct kbase_vinstr_client   *cli,
-		enum base_hwcnt_reader_event event_id);
-
-/**
- * kbase_vinstr_hwc_clear - performs a reset of the hardware counters for
- *                          a given kbase context
- * @cli: pointer to vinstr client
- *
- * Return: zero on success
- */
-int kbase_vinstr_hwc_clear(struct kbase_vinstr_client *cli);
-
-/**
- * kbase_vinstr_try_suspend - try suspending operation of a given vinstr context
- * @vinstr_ctx: vinstr context
- *
- * Return: 0 on success, or negative if state change is in progress
- *
- * Warning: This API call is non-generic. It is meant to be used only by
- *          job scheduler state machine.
- *
- * Function initiates vinstr switch to suspended state. Once it was called
- * vinstr enters suspending state. If function return non-zero value, it
- * indicates that state switch is not complete and function must be called
- * again. On state switch vinstr will trigger job scheduler state machine
- * cycle.
- */
-int kbase_vinstr_try_suspend(struct kbase_vinstr_context *vinstr_ctx);
-
-/**
- * kbase_vinstr_suspend - suspends operation of a given vinstr context
- * @vinstr_ctx: vinstr context
- *
- * Function initiates vinstr switch to suspended state. Then it blocks until
- * operation is completed.
- */
-void kbase_vinstr_suspend(struct kbase_vinstr_context *vinstr_ctx);
-
-/**
- * kbase_vinstr_wait_for_ready - waits for the vinstr context to get ready
- * @vinstr_ctx: vinstr context
- *
- * Function waits for the vinstr to become ready for dumping. It can be in the
- * resuming state after the client was attached but the client currently expects
- * that vinstr is ready for dumping immediately post attach.
- */
-void kbase_vinstr_wait_for_ready(struct kbase_vinstr_context *vinstr_ctx);
-
-/**
- * kbase_vinstr_resume - resumes operation of a given vinstr context
- * @vinstr_ctx: vinstr context
- *
- * Function can be called only if it was preceded by a successful call
+ * It is only valid to call this function one time for each prior returned call
  * to kbase_vinstr_suspend.
  */
-void kbase_vinstr_resume(struct kbase_vinstr_context *vinstr_ctx);
+void kbase_vinstr_resume(struct kbase_vinstr_context *vctx);
 
 /**
- * kbase_vinstr_dump_size - Return required size of dump buffer
- * @kbdev: device pointer
+ * kbase_vinstr_hwcnt_reader_setup() - Set up a new hardware counter reader
+ *                                     client.
+ * @vinstr_ctx: Non-NULL pointer to the vinstr context.
+ * @setup:      Non-NULL pointer to the hwcnt reader configuration.
  *
- * Return : buffer size in bytes
+ * Return: file descriptor on success, else a (negative) error code.
  */
-size_t kbase_vinstr_dump_size(struct kbase_device *kbdev);
-
-/**
- * kbase_vinstr_detach_client - Detach a client from the vinstr core
- * @cli: pointer to vinstr client
- */
-void kbase_vinstr_detach_client(struct kbase_vinstr_client *cli);
-
-/**
- * kbase_vinstr_suspend_client - Suspend vinstr client
- * @client: pointer to vinstr client
- */
-void kbase_vinstr_suspend_client(struct kbase_vinstr_client *client);
-
-/**
- * kbase_vinstr_resume_client - Resume vinstr client
- * @client: pointer to vinstr client
- */
-void kbase_vinstr_resume_client(struct kbase_vinstr_client *client);
+int kbase_vinstr_hwcnt_reader_setup(
+	struct kbase_vinstr_context *vinstr_ctx,
+	struct kbase_ioctl_hwcnt_reader_setup *setup);
 
 #endif /* _KBASE_VINSTR_H_ */
-
diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
index 920562e..6c6a8c6 100644
--- a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
+++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -154,7 +154,6 @@
 DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
 DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
 DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
-DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
 DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
 DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
 DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h
index 0741dfc..96296ac 100644
--- a/drivers/gpu/arm/midgard/mali_linux_trace.h
+++ b/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2016 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,159 +32,103 @@
 #define MALI_JOB_SLOTS_EVENT_CHANGED
 
 /**
- * mali_job_slots_event - called from mali_kbase_core_linux.c
- * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ * mali_job_slots_event - Reports change of job slot status.
+ * @gpu_id:   Kbase device id
+ * @event_id: ORed together bitfields representing a type of event,
+ *            made with the GATOR_MAKE_EVENT() macro.
  */
 TRACE_EVENT(mali_job_slots_event,
-	TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
-			unsigned char job_id),
-	TP_ARGS(event_id, tgid, pid, job_id),
+	TP_PROTO(u32 gpu_id, u32 event_id, u32 tgid, u32 pid,
+		u8 job_id),
+	TP_ARGS(gpu_id, event_id, tgid, pid, job_id),
 	TP_STRUCT__entry(
-		__field(unsigned int, event_id)
-		__field(unsigned int, tgid)
-		__field(unsigned int, pid)
-		__field(unsigned char, job_id)
+		__field(u32, gpu_id)
+		__field(u32, event_id)
+		__field(u32, tgid)
+		__field(u32, pid)
+		__field(u8,  job_id)
 	),
 	TP_fast_assign(
+		__entry->gpu_id   = gpu_id;
 		__entry->event_id = event_id;
-		__entry->tgid = tgid;
-		__entry->pid = pid;
-		__entry->job_id = job_id;
+		__entry->tgid     = tgid;
+		__entry->pid      = pid;
+		__entry->job_id   = job_id;
 	),
-	TP_printk("event=%u tgid=%u pid=%u job_id=%u",
-		__entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+	TP_printk("gpu=%u event=%u tgid=%u pid=%u job_id=%u",
+		__entry->gpu_id, __entry->event_id,
+		__entry->tgid, __entry->pid, __entry->job_id)
 );
 
 /**
- * mali_pm_status - Called by mali_kbase_pm_driver.c
- * @event_id: core type (shader, tiler, l2 cache)
- * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ * mali_pm_status - Reports change of power management status.
+ * @gpu_id:   Kbase device id
+ * @event_id: Core type (shader, tiler, L2 cache)
+ * @value:    64bits bitmask reporting either power status of
+ *            the cores (1-ON, 0-OFF)
  */
 TRACE_EVENT(mali_pm_status,
-	TP_PROTO(unsigned int event_id, unsigned long long value),
-	TP_ARGS(event_id, value),
+	TP_PROTO(u32 gpu_id, u32 event_id, u64 value),
+	TP_ARGS(gpu_id, event_id, value),
 	TP_STRUCT__entry(
-		__field(unsigned int, event_id)
-		__field(unsigned long long, value)
+		__field(u32, gpu_id)
+		__field(u32, event_id)
+		__field(u64, value)
 	),
 	TP_fast_assign(
+		__entry->gpu_id   = gpu_id;
 		__entry->event_id = event_id;
-		__entry->value = value;
+		__entry->value    = value;
 	),
-	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+	TP_printk("gpu=%u event %u = %llu",
+		__entry->gpu_id, __entry->event_id, __entry->value)
 );
 
 /**
- * mali_pm_power_on - Called by mali_kbase_pm_driver.c
- * @event_id: core type (shader, tiler, l2 cache)
- * @value: 64bits bitmask reporting the cores to power up
- */
-TRACE_EVENT(mali_pm_power_on,
-	TP_PROTO(unsigned int event_id, unsigned long long value),
-	TP_ARGS(event_id, value),
-	TP_STRUCT__entry(
-		__field(unsigned int, event_id)
-		__field(unsigned long long, value)
-	),
-	TP_fast_assign(
-		__entry->event_id = event_id;
-		__entry->value = value;
-	),
-	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
-);
-
-/**
- * mali_pm_power_off - Called by mali_kbase_pm_driver.c
- * @event_id: core type (shader, tiler, l2 cache)
- * @value: 64bits bitmask reporting the cores to power down
- */
-TRACE_EVENT(mali_pm_power_off,
-	TP_PROTO(unsigned int event_id, unsigned long long value),
-	TP_ARGS(event_id, value),
-	TP_STRUCT__entry(
-		__field(unsigned int, event_id)
-		__field(unsigned long long, value)
-	),
-	TP_fast_assign(
-		__entry->event_id = event_id;
-		__entry->value = value;
-	),
-	TP_printk("event %u = %llu", __entry->event_id, __entry->value)
-);
-
-/**
- * mali_page_fault_insert_pages - Called by page_fault_worker()
- * it reports an MMU page fault resulting in new pages being mapped.
- * @event_id: MMU address space number.
- * @value: number of newly allocated pages
+ * mali_page_fault_insert_pages - Reports an MMU page fault
+ * resulting in new pages being mapped.
+ * @gpu_id:   Kbase device id
+ * @event_id: MMU address space number
+ * @value:    Number of newly allocated pages
  */
 TRACE_EVENT(mali_page_fault_insert_pages,
-	TP_PROTO(int event_id, unsigned long value),
-	TP_ARGS(event_id, value),
+	TP_PROTO(u32 gpu_id, s32 event_id, u64 value),
+	TP_ARGS(gpu_id, event_id, value),
 	TP_STRUCT__entry(
-		__field(int, event_id)
-		__field(unsigned long, value)
+		__field(u32, gpu_id)
+		__field(s32, event_id)
+		__field(u64, value)
 	),
 	TP_fast_assign(
+		__entry->gpu_id   = gpu_id;
 		__entry->event_id = event_id;
-		__entry->value = value;
+		__entry->value    = value;
 	),
-	TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+	TP_printk("gpu=%u event %d = %llu",
+		__entry->gpu_id, __entry->event_id, __entry->value)
 );
 
 /**
- * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
- * it reports that a certain MMU address space is in use now.
- * @event_id: MMU address space number.
- */
-TRACE_EVENT(mali_mmu_as_in_use,
-	TP_PROTO(int event_id),
-	TP_ARGS(event_id),
-	TP_STRUCT__entry(
-		__field(int, event_id)
-	),
-	TP_fast_assign(
-		__entry->event_id = event_id;
-	),
-	TP_printk("event=%d", __entry->event_id)
-);
-
-/**
- * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
- * it reports that a certain MMU address space has been released now.
- * @event_id: MMU address space number.
- */
-TRACE_EVENT(mali_mmu_as_released,
-	TP_PROTO(int event_id),
-	TP_ARGS(event_id),
-	TP_STRUCT__entry(
-		__field(int, event_id)
-	),
-	TP_fast_assign(
-		__entry->event_id = event_id;
-	),
-	TP_printk("event=%d", __entry->event_id)
-);
-
-/**
- * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
- *                                 and by kbase_atomic_sub_pages()
- * it reports that the total number of allocated pages is changed.
- * @event_id: number of pages to be added or subtracted (according to the sign).
+ * mali_total_alloc_pages_change - Reports that the total number of
+ * allocated pages has changed.
+ * @gpu_id:   Kbase device id
+ * @event_id: Total number of pages allocated
  */
 TRACE_EVENT(mali_total_alloc_pages_change,
-	TP_PROTO(long long int event_id),
-	TP_ARGS(event_id),
+	TP_PROTO(u32 gpu_id, s64 event_id),
+	TP_ARGS(gpu_id, event_id),
 	TP_STRUCT__entry(
-		__field(long long int, event_id)
+		__field(u32, gpu_id)
+		__field(s64, event_id)
 	),
 	TP_fast_assign(
+		__entry->gpu_id   = gpu_id;
 		__entry->event_id = event_id;
 	),
-	TP_printk("event=%lld", __entry->event_id)
+	TP_printk("gpu=%u event=%lld", __entry->gpu_id, __entry->event_id)
 );
 
-#endif				/*  _TRACE_MALI_H */
+#endif /* _TRACE_MALI_H */
 
 #undef TRACE_INCLUDE_PATH
 #undef linux
diff --git a/drivers/gpu/arm/midgard/mali_malisw.h b/drivers/gpu/arm/midgard/mali_malisw.h
index f17bd5e..3a4db10 100644
--- a/drivers/gpu/arm/midgard/mali_malisw.h
+++ b/drivers/gpu/arm/midgard/mali_malisw.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -83,15 +83,6 @@
 #define CSTD_NOP(...)	((void)#__VA_ARGS__)
 
 /**
- * Function-like macro for converting a pointer in to a u64 for storing into
- * an external data structure. This is commonly used when pairing a 32-bit
- * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
- * is complex and a straight cast does not work reliably as pointers are
- * often considered as signed.
- */
-#define PTR_TO_U64(x)	((uint64_t)((uintptr_t)(x)))
-
-/**
  * @hideinitializer
  * Function-like macro for stringizing a single level macro.
  * @code
@@ -115,22 +106,4 @@
  */
 #define CSTD_STR2(x)	CSTD_STR1(x)
 
-/**
- * Specify an assertion value which is evaluated at compile time. Recommended
- * usage is specification of a @c static @c INLINE function containing all of
- * the assertions thus:
- *
- * @code
- * static INLINE [module]_compile_time_assertions( void )
- * {
- *     COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
- * }
- * @endcode
- *
- * @note Use @c static not @c STATIC. We never want to turn off this @c static
- * specification for testing purposes.
- */
-#define CSTD_COMPILE_TIME_ASSERT(expr) \
-	do { switch (0) { case 0: case (expr):; } } while (false)
-
 #endif /* _MALISW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h
index 8d9f7b6..f0ec391 100644
--- a/drivers/gpu/arm/midgard/mali_midg_regmap.h
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,274 +20,185 @@
  *
  */
 
-#ifndef _MIDGARD_REGMAP_H_
-#define _MIDGARD_REGMAP_H_
+#ifndef _MIDG_REGMAP_H_
+#define _MIDG_REGMAP_H_
 
 #include "mali_midg_coherency.h"
 #include "mali_kbase_gpu_id.h"
+#include "mali_midg_regmap_jm.h"
 
-/*
- * Begin Register Offsets
- */
+/* Begin Register Offsets */
+/* GPU control registers */
 
 #define GPU_CONTROL_BASE        0x0000
 #define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
-#define GPU_ID                  0x000	/* (RO) GPU and revision identifier */
-#define L2_FEATURES             0x004	/* (RO) Level 2 cache features */
-#define CORE_FEATURES           0x008	/* (RO) Shader Core Features */
-#define TILER_FEATURES          0x00C	/* (RO) Tiler Features */
-#define MEM_FEATURES            0x010	/* (RO) Memory system features */
-#define MMU_FEATURES            0x014	/* (RO) MMU features */
-#define AS_PRESENT              0x018	/* (RO) Address space slots present */
-#define JS_PRESENT              0x01C	/* (RO) Job slots present */
-#define GPU_IRQ_RAWSTAT         0x020	/* (RW) */
-#define GPU_IRQ_CLEAR           0x024	/* (WO) */
-#define GPU_IRQ_MASK            0x028	/* (RW) */
-#define GPU_IRQ_STATUS          0x02C	/* (RO) */
+#define GPU_ID                  0x000   /* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004   /* (RO) Level 2 cache features */
+#define TILER_FEATURES          0x00C   /* (RO) Tiler Features */
+#define MEM_FEATURES            0x010   /* (RO) Memory system features */
+#define MMU_FEATURES            0x014   /* (RO) MMU features */
+#define AS_PRESENT              0x018   /* (RO) Address space slots present */
+#define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
+#define GPU_IRQ_CLEAR           0x024   /* (WO) */
+#define GPU_IRQ_MASK            0x028   /* (RW) */
+#define GPU_IRQ_STATUS          0x02C   /* (RO) */
 
+#define GPU_COMMAND             0x030   /* (WO) */
+#define GPU_STATUS              0x034   /* (RO) */
 
-/* IRQ flags */
-#define GPU_FAULT               (1 << 0)	/* A GPU Fault has occurred */
-#define MULTIPLE_GPU_FAULTS     (1 << 7)	/* More than one GPU Fault occurred. */
-#define RESET_COMPLETED         (1 << 8)	/* Set when a reset has completed. Intended to use with SOFT_RESET
-						   commands which may take time. */
-#define POWER_CHANGED_SINGLE    (1 << 9)	/* Set when a single core has finished powering up or down. */
-#define POWER_CHANGED_ALL       (1 << 10)	/* Set when all cores have finished powering up or down
-						   and the power manager is idle. */
+#define GPU_DBGEN               (1 << 8)    /* DBGEN wire status */
 
-#define PRFCNT_SAMPLE_COMPLETED (1 << 16)	/* Set when a performance count sample has completed. */
-#define CLEAN_CACHES_COMPLETED  (1 << 17)	/* Set when a cache clean operation has completed. */
+#define GPU_FAULTSTATUS         0x03C   /* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040   /* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044   /* (RO) GPU exception fault address, high word */
 
-#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
-			| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+#define L2_CONFIG               0x048   /* (RW) Level 2 cache configuration */
 
-#define GPU_COMMAND             0x030	/* (WO) */
-#define GPU_STATUS              0x034	/* (RO) */
-#define LATEST_FLUSH            0x038	/* (RO) */
+#define PWR_KEY                 0x050   /* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054   /* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058   /* (RW) Power manager override settings */
 
-#define GROUPS_L2_COHERENT      (1 << 0)	/* Cores groups are l2 coherent */
-#define GPU_DBGEN               (1 << 8)	/* DBGEN wire status */
+#define PRFCNT_BASE_LO          0x060   /* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI          0x064   /* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG           0x068   /* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C   /* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN        0x070   /* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN         0x074   /* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN        0x07C   /* (RW) Performance counter enable flags for MMU/L2 cache */
 
-#define GPU_FAULTSTATUS         0x03C	/* (RO) GPU exception type and fault status */
-#define GPU_FAULTADDRESS_LO     0x040	/* (RO) GPU exception fault address, low word */
-#define GPU_FAULTADDRESS_HI     0x044	/* (RO) GPU exception fault address, high word */
+#define CYCLE_COUNT_LO          0x090   /* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094   /* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098   /* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C   /* (RO) Global time stamp counter, high word */
 
-#define PWR_KEY                 0x050	/* (WO) Power manager key register */
-#define PWR_OVERRIDE0           0x054	/* (RW) Power manager override settings */
-#define PWR_OVERRIDE1           0x058	/* (RW) Power manager override settings */
+#define THREAD_MAX_THREADS      0x0A0   /* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8   /* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC   /* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that TLS must be allocated for */
 
-#define PRFCNT_BASE_LO          0x060	/* (RW) Performance counter memory region base address, low word */
-#define PRFCNT_BASE_HI          0x064	/* (RW) Performance counter memory region base address, high word */
-#define PRFCNT_CONFIG           0x068	/* (RW) Performance counter configuration */
-#define PRFCNT_JM_EN            0x06C	/* (RW) Performance counter enable flags for Job Manager */
-#define PRFCNT_SHADER_EN        0x070	/* (RW) Performance counter enable flags for shader cores */
-#define PRFCNT_TILER_EN         0x074	/* (RW) Performance counter enable flags for tiler */
-#define PRFCNT_MMU_L2_EN        0x07C	/* (RW) Performance counter enable flags for MMU/L2 cache */
-
-#define CYCLE_COUNT_LO          0x090	/* (RO) Cycle counter, low word */
-#define CYCLE_COUNT_HI          0x094	/* (RO) Cycle counter, high word */
-#define TIMESTAMP_LO            0x098	/* (RO) Global time stamp counter, low word */
-#define TIMESTAMP_HI            0x09C	/* (RO) Global time stamp counter, high word */
-
-#define THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
-#define THREAD_MAX_WORKGROUP_SIZE 0x0A4	/* (RO) Maximum workgroup size */
-#define THREAD_MAX_BARRIER_SIZE 0x0A8	/* (RO) Maximum threads waiting at a barrier */
-#define THREAD_FEATURES         0x0AC	/* (RO) Thread features */
-#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that
-					 * TLS must be allocated for
-					 */
-
-#define TEXTURE_FEATURES_0      0x0B0	/* (RO) Support flags for indexed texture formats 0..31 */
-#define TEXTURE_FEATURES_1      0x0B4	/* (RO) Support flags for indexed texture formats 32..63 */
-#define TEXTURE_FEATURES_2      0x0B8	/* (RO) Support flags for indexed texture formats 64..95 */
-#define TEXTURE_FEATURES_3      0x0BC	/* (RO) Support flags for texture order */
+#define TEXTURE_FEATURES_0      0x0B0   /* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4   /* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8   /* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC   /* (RO) Support flags for texture order */
 
 #define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
 
-#define JS0_FEATURES            0x0C0	/* (RO) Features of job slot 0 */
-#define JS1_FEATURES            0x0C4	/* (RO) Features of job slot 1 */
-#define JS2_FEATURES            0x0C8	/* (RO) Features of job slot 2 */
-#define JS3_FEATURES            0x0CC	/* (RO) Features of job slot 3 */
-#define JS4_FEATURES            0x0D0	/* (RO) Features of job slot 4 */
-#define JS5_FEATURES            0x0D4	/* (RO) Features of job slot 5 */
-#define JS6_FEATURES            0x0D8	/* (RO) Features of job slot 6 */
-#define JS7_FEATURES            0x0DC	/* (RO) Features of job slot 7 */
-#define JS8_FEATURES            0x0E0	/* (RO) Features of job slot 8 */
-#define JS9_FEATURES            0x0E4	/* (RO) Features of job slot 9 */
-#define JS10_FEATURES           0x0E8	/* (RO) Features of job slot 10 */
-#define JS11_FEATURES           0x0EC	/* (RO) Features of job slot 11 */
-#define JS12_FEATURES           0x0F0	/* (RO) Features of job slot 12 */
-#define JS13_FEATURES           0x0F4	/* (RO) Features of job slot 13 */
-#define JS14_FEATURES           0x0F8	/* (RO) Features of job slot 14 */
-#define JS15_FEATURES           0x0FC	/* (RO) Features of job slot 15 */
+#define SHADER_PRESENT_LO       0x100   /* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104   /* (RO) Shader core present bitmap, high word */
 
-#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+#define TILER_PRESENT_LO        0x110   /* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114   /* (RO) Tiler core present bitmap, high word */
 
-#define SHADER_PRESENT_LO       0x100	/* (RO) Shader core present bitmap, low word */
-#define SHADER_PRESENT_HI       0x104	/* (RO) Shader core present bitmap, high word */
-
-#define TILER_PRESENT_LO        0x110	/* (RO) Tiler core present bitmap, low word */
-#define TILER_PRESENT_HI        0x114	/* (RO) Tiler core present bitmap, high word */
-
-#define L2_PRESENT_LO           0x120	/* (RO) Level 2 cache present bitmap, low word */
-#define L2_PRESENT_HI           0x124	/* (RO) Level 2 cache present bitmap, high word */
+#define L2_PRESENT_LO           0x120   /* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124   /* (RO) Level 2 cache present bitmap, high word */
 
 #define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
 #define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
 
+#define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */
 
-#define SHADER_READY_LO         0x140	/* (RO) Shader core ready bitmap, low word */
-#define SHADER_READY_HI         0x144	/* (RO) Shader core ready bitmap, high word */
+#define TILER_READY_LO          0x150   /* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154   /* (RO) Tiler core ready bitmap, high word */
 
-#define TILER_READY_LO          0x150	/* (RO) Tiler core ready bitmap, low word */
-#define TILER_READY_HI          0x154	/* (RO) Tiler core ready bitmap, high word */
-
-#define L2_READY_LO             0x160	/* (RO) Level 2 cache ready bitmap, low word */
-#define L2_READY_HI             0x164	/* (RO) Level 2 cache ready bitmap, high word */
+#define L2_READY_LO             0x160   /* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164   /* (RO) Level 2 cache ready bitmap, high word */
 
 #define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
 #define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
 
+#define SHADER_PWRON_LO         0x180   /* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184   /* (WO) Shader core power on bitmap, high word */
 
-#define SHADER_PWRON_LO         0x180	/* (WO) Shader core power on bitmap, low word */
-#define SHADER_PWRON_HI         0x184	/* (WO) Shader core power on bitmap, high word */
+#define TILER_PWRON_LO          0x190   /* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194   /* (WO) Tiler core power on bitmap, high word */
 
-#define TILER_PWRON_LO          0x190	/* (WO) Tiler core power on bitmap, low word */
-#define TILER_PWRON_HI          0x194	/* (WO) Tiler core power on bitmap, high word */
-
-#define L2_PWRON_LO             0x1A0	/* (WO) Level 2 cache power on bitmap, low word */
-#define L2_PWRON_HI             0x1A4	/* (WO) Level 2 cache power on bitmap, high word */
+#define L2_PWRON_LO             0x1A0   /* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4   /* (WO) Level 2 cache power on bitmap, high word */
 
 #define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
 #define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
 
+#define SHADER_PWROFF_LO        0x1C0   /* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4   /* (WO) Shader core power off bitmap, high word */
 
-#define SHADER_PWROFF_LO        0x1C0	/* (WO) Shader core power off bitmap, low word */
-#define SHADER_PWROFF_HI        0x1C4	/* (WO) Shader core power off bitmap, high word */
+#define TILER_PWROFF_LO         0x1D0   /* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4   /* (WO) Tiler core power off bitmap, high word */
 
-#define TILER_PWROFF_LO         0x1D0	/* (WO) Tiler core power off bitmap, low word */
-#define TILER_PWROFF_HI         0x1D4	/* (WO) Tiler core power off bitmap, high word */
-
-#define L2_PWROFF_LO            0x1E0	/* (WO) Level 2 cache power off bitmap, low word */
-#define L2_PWROFF_HI            0x1E4	/* (WO) Level 2 cache power off bitmap, high word */
+#define L2_PWROFF_LO            0x1E0   /* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4   /* (WO) Level 2 cache power off bitmap, high word */
 
 #define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
 #define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
 
+#define SHADER_PWRTRANS_LO      0x200   /* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204   /* (RO) Shader core power transition bitmap, high word */
 
-#define SHADER_PWRTRANS_LO      0x200	/* (RO) Shader core power transition bitmap, low word */
-#define SHADER_PWRTRANS_HI      0x204	/* (RO) Shader core power transition bitmap, high word */
+#define TILER_PWRTRANS_LO       0x210   /* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214   /* (RO) Tiler core power transition bitmap, high word */
 
-#define TILER_PWRTRANS_LO       0x210	/* (RO) Tiler core power transition bitmap, low word */
-#define TILER_PWRTRANS_HI       0x214	/* (RO) Tiler core power transition bitmap, high word */
-
-#define L2_PWRTRANS_LO          0x220	/* (RO) Level 2 cache power transition bitmap, low word */
-#define L2_PWRTRANS_HI          0x224	/* (RO) Level 2 cache power transition bitmap, high word */
+#define L2_PWRTRANS_LO          0x220   /* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224   /* (RO) Level 2 cache power transition bitmap, high word */
 
 #define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
 #define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
 
+#define SHADER_PWRACTIVE_LO     0x240   /* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244   /* (RO) Shader core active bitmap, high word */
 
-#define SHADER_PWRACTIVE_LO     0x240	/* (RO) Shader core active bitmap, low word */
-#define SHADER_PWRACTIVE_HI     0x244	/* (RO) Shader core active bitmap, high word */
+#define TILER_PWRACTIVE_LO      0x250   /* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254   /* (RO) Tiler core active bitmap, high word */
 
-#define TILER_PWRACTIVE_LO      0x250	/* (RO) Tiler core active bitmap, low word */
-#define TILER_PWRACTIVE_HI      0x254	/* (RO) Tiler core active bitmap, high word */
+#define L2_PWRACTIVE_LO         0x260   /* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264   /* (RO) Level 2 cache active bitmap, high word */
 
-#define L2_PWRACTIVE_LO         0x260	/* (RO) Level 2 cache active bitmap, low word */
-#define L2_PWRACTIVE_HI         0x264	/* (RO) Level 2 cache active bitmap, high word */
+#define COHERENCY_FEATURES      0x300   /* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304   /* (RW) Coherency enable */
 
-#define COHERENCY_FEATURES      0x300	/* (RO) Coherency features present */
-#define COHERENCY_ENABLE        0x304	/* (RW) Coherency enable */
+#define SHADER_CONFIG           0xF04   /* (RW) Shader core configuration (implementation-specific) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration (implementation-specific) */
+#define L2_MMU_CONFIG           0xF0C   /* (RW) L2 cache and MMU configuration (implementation-specific) */
 
-#define JM_CONFIG               0xF00   /* (RW) Job Manager configuration register (Implementation specific register) */
-#define SHADER_CONFIG           0xF04	/* (RW) Shader core configuration settings (Implementation specific register) */
-#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration settings (Implementation specific register) */
-#define L2_MMU_CONFIG           0xF0C	/* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+/* Job control registers */
 
 #define JOB_CONTROL_BASE        0x1000
 
 #define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
 
-#define JOB_IRQ_RAWSTAT         0x000	/* Raw interrupt status register */
-#define JOB_IRQ_CLEAR           0x004	/* Interrupt clear register */
-#define JOB_IRQ_MASK            0x008	/* Interrupt mask register */
-#define JOB_IRQ_STATUS          0x00C	/* Interrupt status register */
-#define JOB_IRQ_JS_STATE        0x010	/* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
-#define JOB_IRQ_THROTTLE        0x014	/* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+#define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
 
-/* JOB IRQ flags */
-#define JOB_IRQ_GLOBAL_IF       (1 << 18)   /* Global interface interrupt received */
-
-#define JOB_SLOT0               0x800	/* Configuration registers for job slot 0 */
-#define JOB_SLOT1               0x880	/* Configuration registers for job slot 1 */
-#define JOB_SLOT2               0x900	/* Configuration registers for job slot 2 */
-#define JOB_SLOT3               0x980	/* Configuration registers for job slot 3 */
-#define JOB_SLOT4               0xA00	/* Configuration registers for job slot 4 */
-#define JOB_SLOT5               0xA80	/* Configuration registers for job slot 5 */
-#define JOB_SLOT6               0xB00	/* Configuration registers for job slot 6 */
-#define JOB_SLOT7               0xB80	/* Configuration registers for job slot 7 */
-#define JOB_SLOT8               0xC00	/* Configuration registers for job slot 8 */
-#define JOB_SLOT9               0xC80	/* Configuration registers for job slot 9 */
-#define JOB_SLOT10              0xD00	/* Configuration registers for job slot 10 */
-#define JOB_SLOT11              0xD80	/* Configuration registers for job slot 11 */
-#define JOB_SLOT12              0xE00	/* Configuration registers for job slot 12 */
-#define JOB_SLOT13              0xE80	/* Configuration registers for job slot 13 */
-#define JOB_SLOT14              0xF00	/* Configuration registers for job slot 14 */
-#define JOB_SLOT15              0xF80	/* Configuration registers for job slot 15 */
-
-#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
-
-#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
-#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
-#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
-#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
-#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
-#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
-#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
-#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
-					   slot n */
-
-#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
-#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
-
-#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
-#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
-
-#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
-#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
-#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
-#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
-					   job slot n */
-
-#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
-
-#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+/* MMU control registers */
 
 #define MEMORY_MANAGEMENT_BASE  0x2000
 #define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
 
-#define MMU_IRQ_RAWSTAT         0x000	/* (RW) Raw interrupt status register */
-#define MMU_IRQ_CLEAR           0x004	/* (WO) Interrupt clear register */
-#define MMU_IRQ_MASK            0x008	/* (RW) Interrupt mask register */
-#define MMU_IRQ_STATUS          0x00C	/* (RO) Interrupt status register */
+#define MMU_IRQ_RAWSTAT         0x000   /* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
 
-#define MMU_AS0                 0x400	/* Configuration registers for address space 0 */
-#define MMU_AS1                 0x440	/* Configuration registers for address space 1 */
-#define MMU_AS2                 0x480	/* Configuration registers for address space 2 */
-#define MMU_AS3                 0x4C0	/* Configuration registers for address space 3 */
-#define MMU_AS4                 0x500	/* Configuration registers for address space 4 */
-#define MMU_AS5                 0x540	/* Configuration registers for address space 5 */
-#define MMU_AS6                 0x580	/* Configuration registers for address space 6 */
-#define MMU_AS7                 0x5C0	/* Configuration registers for address space 7 */
-#define MMU_AS8                 0x600	/* Configuration registers for address space 8 */
-#define MMU_AS9                 0x640	/* Configuration registers for address space 9 */
-#define MMU_AS10                0x680	/* Configuration registers for address space 10 */
-#define MMU_AS11                0x6C0	/* Configuration registers for address space 11 */
-#define MMU_AS12                0x700	/* Configuration registers for address space 12 */
-#define MMU_AS13                0x740	/* Configuration registers for address space 13 */
-#define MMU_AS14                0x780	/* Configuration registers for address space 14 */
-#define MMU_AS15                0x7C0	/* Configuration registers for address space 15 */
+#define MMU_AS0                 0x400   /* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500   /* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540   /* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580   /* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0   /* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600   /* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640   /* Configuration registers for address space 9 */
+#define MMU_AS10                0x680   /* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0   /* Configuration registers for address space 11 */
+#define MMU_AS12                0x700   /* Configuration registers for address space 12 */
+#define MMU_AS13                0x740   /* Configuration registers for address space 13 */
+#define MMU_AS14                0x780   /* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0   /* Configuration registers for address space 15 */
+
+/* MMU address space control registers */
 
 #define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
 
@@ -303,7 +214,6 @@
 #define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
 #define AS_STATUS              0x28	/* (RO) Status flags for address space n */
 
-
 /* (RW) Translation table configuration for address space n, low word */
 #define AS_TRANSCFG_LO         0x30
 /* (RW) Translation table configuration for address space n, high word */
@@ -315,17 +225,30 @@
 
 /* End Register Offsets */
 
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)    /* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)    /* More than one GPU Fault occurred. */
+#define RESET_COMPLETED         (1 << 8)    /* Set when a reset has completed. */
+#define POWER_CHANGED_SINGLE    (1 << 9)    /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)   /* Set when all cores have finished powering up or down. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)   /* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)   /* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+		| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
 /*
  * MMU_IRQ_RAWSTAT register values. Values are valid also for
-   MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
  */
 
-#define MMU_PAGE_FAULT_FLAGS   16
+#define MMU_PAGE_FAULT_FLAGS    16
 
 /* Macros returning a bitmask to retrieve page fault or bus error flags from
  * MMU registers */
-#define MMU_PAGE_FAULT(n)      (1UL << (n))
-#define MMU_BUS_ERROR(n)       (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+#define MMU_PAGE_FAULT(n)       (1UL << (n))
+#define MMU_BUS_ERROR(n)        (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
 
 /*
  * Begin LPAE MMU TRANSTAB register values
@@ -350,25 +273,23 @@
  */
 #define AS_STATUS_AS_ACTIVE 0x01
 
-#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                    (0x7<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT       (0x0<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT        (0x1<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT      (0x2<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG             (0x3<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                      (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT         (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT          (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT        (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG               (0x3<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT        (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT   (0x5<<3)
 
-#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT      (0x4<<3)
-#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT (0x5<<3)
-
-#define AS_FAULTSTATUS_ACCESS_TYPE_MASK                  (0x3<<8)
-#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC                (0x0<<8)
-#define AS_FAULTSTATUS_ACCESS_TYPE_EX                    (0x1<<8)
-#define AS_FAULTSTATUS_ACCESS_TYPE_READ                  (0x2<<8)
-#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE                 (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK         (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC       (0x0<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX           (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ         (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE        (0x3<<8)
 
 /*
  * Begin MMU TRANSCFG register values
  */
-
 #define AS_TRANSCFG_ADRMODE_LEGACY      0
 #define AS_TRANSCFG_ADRMODE_UNMAPPED    1
 #define AS_TRANSCFG_ADRMODE_IDENTITY    2
@@ -377,34 +298,22 @@
 
 #define AS_TRANSCFG_ADRMODE_MASK        0xF
 
-
 /*
  * Begin TRANSCFG register values
  */
-#define AS_TRANSCFG_PTW_MEMATTR_MASK (3 << 24)
-#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1 << 24)
-#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2 << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
 
-#define AS_TRANSCFG_PTW_SH_MASK ((3 << 28))
-#define AS_TRANSCFG_PTW_SH_OS (2 << 28)
-#define AS_TRANSCFG_PTW_SH_IS (3 << 28)
+#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
+#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
 
 /*
  * Begin Command Values
  */
 
-/* JS_COMMAND register commands */
-#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
-#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
-#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
-#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
-#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
-#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
-#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
-#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
-
-#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
-
 /* AS_COMMAND register commands */
 #define AS_COMMAND_NOP         0x00	/* NOP Operation */
 #define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
@@ -416,90 +325,25 @@
 #define AS_COMMAND_FLUSH_MEM   0x05	/* Wait for memory accesses to complete, flush all the L1s cache then
 					   flush all L2 caches then issue a flush region command to all MMUs */
 
-/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
-#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
-#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
-#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
-#define JS_CONFIG_START_MMU                    (1u << 10)
-#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
-#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
-#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
-#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
-#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
-#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
-#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
-
-/* JS_XAFFINITY register values */
-#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
-#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
-#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
-
-/* JS_STATUS register values */
-
-/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
- * The values are separated to avoid dependency of userspace and kernel code.
- */
-
-/* Group of values representing the job status insead a particular fault */
-#define JS_STATUS_NO_EXCEPTION_BASE   0x00
-#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
-#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
-#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
-
-/* General fault values */
-#define JS_STATUS_FAULT_BASE          0x40
-#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
-#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
-#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
-#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
-#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
-#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
-
-/* Instruction or data faults */
-#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
-#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
-#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
-#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
-#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
-#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
-#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
-#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
-/* NOTE: No fault with 0x57 code defined in spec. */
-#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
-#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
-#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
-
-/* Other faults */
-#define JS_STATUS_MEMORY_FAULT_BASE   0x60
-#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
-#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
-
-/* GPU_COMMAND values */
-#define GPU_COMMAND_NOP                0x00	/* No operation, nothing happens */
-#define GPU_COMMAND_SOFT_RESET         0x01	/* Stop all external bus interfaces, and then reset the entire GPU. */
-#define GPU_COMMAND_HARD_RESET         0x02	/* Immediately reset the entire GPU. */
-#define GPU_COMMAND_PRFCNT_CLEAR       0x03	/* Clear all performance counters, setting them all to zero. */
-#define GPU_COMMAND_PRFCNT_SAMPLE      0x04	/* Sample all performance counters, writing them out to memory */
-#define GPU_COMMAND_CYCLE_COUNT_START  0x05	/* Starts the cycle counter, and system timestamp propagation */
-#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06	/* Stops the cycle counter, and system timestamp propagation */
-#define GPU_COMMAND_CLEAN_CACHES       0x07	/* Clean all caches */
-#define GPU_COMMAND_CLEAN_INV_CACHES   0x08	/* Clean and invalidate all caches */
-#define GPU_COMMAND_SET_PROTECTED_MODE 0x09	/* Places the GPU in protected mode */
-
-/* End Command Values */
-
 /* GPU_STATUS values */
-#define GPU_STATUS_PRFCNT_ACTIVE           (1 << 2)	/* Set if the performance counters are active. */
-#define GPU_STATUS_PROTECTED_MODE_ACTIVE   (1 << 7)	/* Set if protected mode is active */
+#define GPU_STATUS_PRFCNT_ACTIVE            (1 << 2)    /* Set if the performance counters are active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE    (1 << 7)    /* Set if protected mode is active */
 
 /* PRFCNT_CONFIG register values */
-#define PRFCNT_CONFIG_MODE_SHIFT      0 /* Counter mode position. */
-#define PRFCNT_CONFIG_AS_SHIFT        4 /* Address space bitmap position. */
-#define PRFCNT_CONFIG_SETSELECT_SHIFT 8 /* Set select position. */
+#define PRFCNT_CONFIG_MODE_SHIFT        0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT          4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT   8 /* Set select position. */
 
-#define PRFCNT_CONFIG_MODE_OFF    0	/* The performance counters are disabled. */
-#define PRFCNT_CONFIG_MODE_MANUAL 1	/* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
-#define PRFCNT_CONFIG_MODE_TILE   2	/* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_OFF          0
+/* The performance counters are enabled, but are only written out when a
+ * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
+ */
+#define PRFCNT_CONFIG_MODE_MANUAL       1
+/* The performance counters are enabled, and are written out each time a tile
+ * finishes rendering.
+ */
+#define PRFCNT_CONFIG_MODE_TILE         2
 
 /* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
 /* Use GPU implementation-defined caching policy. */
@@ -554,40 +398,9 @@
 /* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
 #define AS_MEMATTR_INDEX_NON_CACHEABLE         5
 
-/* JS<n>_FEATURES register */
-
-#define JS_FEATURE_NULL_JOB              (1u << 1)
-#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
-#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
-#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
-#define JS_FEATURE_VERTEX_JOB            (1u << 5)
-#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
-#define JS_FEATURE_TILER_JOB             (1u << 7)
-#define JS_FEATURE_FUSED_JOB             (1u << 8)
-#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
-
-/* End JS<n>_FEATURES register */
-
 /* L2_MMU_CONFIG register */
 #define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
 #define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT        (24)
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS              (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT       (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER      (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF         (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
-
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT       (26)
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES             (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT      (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER     (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
-#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF        (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
-
-#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS_SHIFT      (12)
-#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS            (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
-
-#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES_SHIFT     (15)
-#define L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES           (0x7 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
 
 /* End L2_MMU_CONFIG register */
 
@@ -610,7 +423,6 @@
 /* End THREAD_* registers */
 
 /* SHADER_CONFIG register */
-
 #define SC_ALT_COUNTERS             (1ul << 3)
 #define SC_OVERRIDE_FWD_PIXEL_KILL  (1ul << 4)
 #define SC_SDC_DISABLE_OQ_DISCARD   (1ul << 6)
@@ -619,25 +431,19 @@
 #define SC_TLS_HASH_ENABLE          (1ul << 17)
 #define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
 #define SC_ENABLE_TEXGRD_FLAGS      (1ul << 25)
+#define SC_VAR_ALGORITHM            (1ul << 29)
 /* End SHADER_CONFIG register */
 
 /* TILER_CONFIG register */
-
 #define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
-
 /* End TILER_CONFIG register */
 
-/* JM_CONFIG register */
-
-#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
-#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
-#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
-#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
-#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
-#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
-#define JM_IDVS_GROUP_SIZE_SHIFT (16)
-#define JM_MAX_IDVS_GROUP_SIZE (0x3F)
-/* End JM_CONFIG register */
+/* L2_CONFIG register */
+#define L2_CONFIG_SIZE_SHIFT        16
+#define L2_CONFIG_SIZE_MASK         (0xFFul << L2_CONFIG_SIZE_SHIFT)
+#define L2_CONFIG_HASH_SHIFT        24
+#define L2_CONFIG_HASH_MASK         (0xFFul << L2_CONFIG_HASH_SHIFT)
+/* End L2_CONFIG register */
 
 
-#endif /* _MIDGARD_REGMAP_H_ */
+#endif /* _MIDG_REGMAP_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap_jm.h b/drivers/gpu/arm/midgard/mali_midg_regmap_jm.h
new file mode 100644
index 0000000..58e4d08
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap_jm.h
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MIDG_REGMAP_JM_H_
+#define _MIDG_REGMAP_JM_H_
+
+/* GPU control registers */
+
+#define CORE_FEATURES           0x008   /* (RO) Shader Core Features */
+#define JS_PRESENT              0x01C   /* (RO) Job slots present */
+#define LATEST_FLUSH            0x038   /* (RO) Flush ID of latest clean-and-invalidate operation */
+#define GROUPS_L2_COHERENT      (1 << 0)    /* Cores groups are l2 coherent */
+
+#define JS0_FEATURES            0x0C0   /* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4   /* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8   /* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC   /* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0   /* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4   /* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8   /* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC   /* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0   /* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4   /* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8   /* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC   /* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0   /* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4   /* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8   /* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC   /* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define JM_CONFIG               0xF00   /* (RW) Job manager configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_IRQ_JS_STATE        0x010   /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014   /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880   /* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900   /* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980   /* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00   /* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80   /* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00   /* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80   /* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00   /* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80   /* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00   /* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80   /* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00   /* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80   /* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00   /* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80   /* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY           0x1C	/* (RO) Extended affinity mask for job
+					   slot n */
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C	/* (RW) Next extended affinity mask for
+					   job slot n */
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+/* No JM-specific MMU control registers */
+/* No JM-specific MMU address space control registers */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* JS<n>_FEATURES register */
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* JM_CONFIG register */
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+#define JM_IDVS_GROUP_SIZE_SHIFT (16)
+#define JM_MAX_IDVS_GROUP_SIZE (0x3F)
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00 /* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02 /* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03 /* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04 /* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05 /* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06 /* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07 /* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08 /* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */
+
+#endif /* _MIDG_REGMAP_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h
index c81f404..701f390 100644
--- a/drivers/gpu/arm/midgard/mali_uk.h
+++ b/drivers/gpu/arm/midgard/mali_uk.h
@@ -74,68 +74,6 @@
 	UK_CLIENT_COUNT
 };
 
-/**
- * Each function callable through the UK interface has a unique number.
- * Functions provided by UK clients start from number UK_FUNC_ID.
- * Numbers below UK_FUNC_ID are used for internal UK functions.
- */
-enum uk_func {
-	UKP_FUNC_ID_CHECK_VERSION,   /**< UKK Core internal function */
-	/**
-	 * Each UK client numbers the functions they provide starting from
-	 * number UK_FUNC_ID. This number is then eventually assigned to the
-	 * id field of the union uk_header structure when preparing to make a
-	 * UK call. See your UK client for a list of their function numbers.
-	 */
-	UK_FUNC_ID = 512
-};
-
-/**
- * Arguments for a UK call are stored in a structure. This structure consists
- * of a fixed size header and a payload. The header carries a 32-bit number
- * identifying the UK function to be called (see uk_func). When the UKK client
- * receives this header and executed the requested UK function, it will use
- * the same header to store the result of the function in the form of a
- * int return code. The size of this structure is such that the
- * first member of the payload following the header can be accessed efficiently
- * on a 32 and 64-bit kernel and the structure has the same size regardless
- * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
- * accordingly in the OS specific mali_uk_os.h header file.
- */
-union uk_header {
-	/**
-	 * 32-bit number identifying the UK function to be called.
-	 * Also see uk_func.
-	 */
-	u32 id;
-	/**
-	 * The int return code returned by the called UK function.
-	 * See the specification of the particular UK function you are
-	 * calling for the meaning of the error codes returned. All
-	 * UK functions return 0 on success.
-	 */
-	u32 ret;
-	/*
-	 * Used to ensure 64-bit alignment of this union. Do not remove.
-	 * This field is used for padding and does not need to be initialized.
-	 */
-	u64 sizer;
-};
-
-/**
- * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
- * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
- */
-struct uku_version_check_args {
-	union uk_header header;
-		  /**< UK call header */
-	u16 major;
-	   /**< This field carries the user-side major version on input and the kernel-side major version on output */
-	u16 minor;
-	   /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
-	u8 padding[4];
-};
-
 /** @} end group uk_api */
 
 /** @} *//* end group base_api */
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
index c5f3ad7..8772edb 100644
--- a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015, 2017-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,8 +23,56 @@
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
 #include <linux/pm_runtime.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/regulator/consumer.h>
 #include "mali_kbase_config_platform.h"
 
+static void enable_gpu_power_control(struct kbase_device *kbdev)
+{
+	unsigned int i;
+
+#if defined(CONFIG_REGULATOR)
+	for (i = 0; i < kbdev->nr_regulators; i++) {
+		if (WARN_ON(kbdev->regulators[i] == NULL))
+			;
+		else if (!regulator_is_enabled(kbdev->regulators[i]))
+			WARN_ON(regulator_enable(kbdev->regulators[i]));
+	}
+#endif
+
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (WARN_ON(kbdev->clocks[i] == NULL))
+			;
+		else if (!__clk_is_enabled(kbdev->clocks[i]))
+			WARN_ON(clk_prepare_enable(kbdev->clocks[i]));
+	}
+}
+
+static void disable_gpu_power_control(struct kbase_device *kbdev)
+{
+	unsigned int i;
+
+	for (i = 0; i < kbdev->nr_clocks; i++) {
+		if (WARN_ON(kbdev->clocks[i] == NULL))
+			;
+		else if (__clk_is_enabled(kbdev->clocks[i])) {
+			clk_disable_unprepare(kbdev->clocks[i]);
+			WARN_ON(__clk_is_enabled(kbdev->clocks[i]));
+		}
+
+	}
+
+#if defined(CONFIG_REGULATOR)
+	for (i = 0; i < kbdev->nr_regulators; i++) {
+		if (WARN_ON(kbdev->regulators[i] == NULL))
+			;
+		else if (regulator_is_enabled(kbdev->regulators[i]))
+			WARN_ON(regulator_disable(kbdev->regulators[i]));
+	}
+#endif
+}
+
 static int pm_callback_power_on(struct kbase_device *kbdev)
 {
 	int ret = 1; /* Assume GPU has been powered off */
@@ -33,6 +81,8 @@
 	dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
 			(void *)kbdev->dev->pm_domain);
 
+	enable_gpu_power_control(kbdev);
+
 	error = pm_runtime_get_sync(kbdev->dev);
 	if (error == 1) {
 		/*
@@ -53,6 +103,10 @@
 
 	pm_runtime_mark_last_busy(kbdev->dev);
 	pm_runtime_put_autosuspend(kbdev->dev);
+
+#ifndef KBASE_PM_RUNTIME
+	disable_gpu_power_control(kbdev);
+#endif
 }
 
 #ifdef KBASE_PM_RUNTIME
@@ -87,12 +141,15 @@
 {
 	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
 
+	enable_gpu_power_control(kbdev);
 	return 0;
 }
 
 static void pm_callback_runtime_off(struct kbase_device *kbdev)
 {
 	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+
+	disable_gpu_power_control(kbdev);
 }
 
 static void pm_callback_resume(struct kbase_device *kbdev)
diff --git a/drivers/gpu/arm/midgard/tests/Mconfig b/drivers/gpu/arm/midgard/tests/Mconfig
index ddd7630..af4e383 100644
--- a/drivers/gpu/arm/midgard/tests/Mconfig
+++ b/drivers/gpu/arm/midgard/tests/Mconfig
@@ -21,6 +21,11 @@
 	default y if UNIT_TEST_KERNEL_MODULES && MALI_DEVFREQ
 	default n
 
+config BUILD_IPA_UNIT_TESTS
+	bool
+	default y if NO_MALI && BUILD_IPA_TESTS
+	default n
+
 config BUILD_CSF_TESTS
 	bool
 	default y if UNIT_TEST_KERNEL_MODULES && GPU_HAS_CSF
diff --git a/drivers/gpu/arm/midgard/tests/kutf/build.bp b/drivers/gpu/arm/midgard/tests/kutf/build.bp
index 960c8faa..f0c7a0c 100644
--- a/drivers/gpu/arm/midgard/tests/kutf/build.bp
+++ b/drivers/gpu/arm/midgard/tests/kutf/build.bp
@@ -3,7 +3,7 @@
  * ----------------------------------------------------------------------------
  * This confidential and proprietary software may be used only as authorized
  * by a licensing agreement from ARM Limited.
- *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ *      (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED
  * The entire notice above must be reproduced on all authorized copies and
  * copies may only be made to the extent permitted by a licensing agreement
  * from ARM Limited.
@@ -12,7 +12,10 @@
 
 bob_kernel_module {
     name: "kutf",
-    defaults: ["kernel_defaults"],
+    defaults: [
+        "kernel_defaults",
+        "kutf_includes",
+    ],
     srcs: [
         "Kbuild",
         "kutf_helpers.c",
@@ -23,7 +26,6 @@
         "kutf_utils.c",
     ],
     kbuild_options: ["CONFIG_MALI_KUTF=m"],
-    include_dirs: ["kernel/drivers/gpu/arm/midgard/tests/include"],
     enabled: false,
     base_build_kutf: {
         enabled: true,
diff --git a/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
index 1c350bb..3307c0e 100644
--- a/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
+++ b/drivers/gpu/arm/midgard/tests/kutf/kutf_suite.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014, 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017-2019 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,8 +41,6 @@
 #include <kutf/kutf_utils.h>
 #include <kutf/kutf_helpers.h>
 
-#if defined(CONFIG_DEBUG_FS)
-
 /**
  * struct kutf_application - Structure which represents kutf application
  * @name:	The name of this test application.
@@ -242,8 +240,6 @@
 {
 	switch (context->expected_status) {
 	case KUTF_RESULT_UNKNOWN:
-		if (context->status == KUTF_RESULT_UNKNOWN)
-			kutf_test_pass(context, "(implicit pass)");
 		break;
 
 	case KUTF_RESULT_WARN:
@@ -1141,6 +1137,8 @@
 }
 EXPORT_SYMBOL(kutf_test_abort);
 
+#ifdef CONFIG_DEBUG_FS
+
 /**
  * init_kutf_core() - Module entry point.
  *
@@ -1175,7 +1173,7 @@
 		destroy_workqueue(kutf_workq);
 }
 
-#else	/* defined(CONFIG_DEBUG_FS) */
+#else	/* CONFIG_DEBUG_FS */
 
 /**
  * init_kutf_core() - Module entry point.
@@ -1197,7 +1195,7 @@
 static void __exit exit_kutf_core(void)
 {
 }
-#endif	/* defined(CONFIG_DEBUG_FS) */
+#endif	/* CONFIG_DEBUG_FS */
 
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
index a6669af..971f092 100644
--- a/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
+++ b/drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/build.bp
@@ -3,7 +3,7 @@
  * ----------------------------------------------------------------------------
  * This confidential and proprietary software may be used only as authorized
  * by a licensing agreement from ARM Limited.
- *      (C) COPYRIGHT 2018 ARM Limited, ALL RIGHTS RESERVED
+ *      (C) COPYRIGHT 2018-2019 ARM Limited, ALL RIGHTS RESERVED
  * The entire notice above must be reproduced on all authorized copies and
  * copies may only be made to the extent permitted by a licensing agreement
  * from ARM Limited.
@@ -12,7 +12,10 @@
 
 bob_kernel_module {
     name: "mali_kutf_irq_test",
-    defaults: ["kernel_test_module_defaults"],
+    defaults: [
+        "mali_kbase_shared_config_defaults",
+        "kernel_test_includes",
+    ],
     srcs: [
         "Kbuild",
         "mali_kutf_irq_test_main.c",
@@ -21,7 +24,6 @@
         "mali_kbase",
         "kutf",
     ],
-    install_group: "IG_tests",
     enabled: false,
     base_build_kutf: {
         enabled: true,
diff --git a/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
index 9cb0465..f266d8e 100644
--- a/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
+++ b/drivers/gpu/arm/midgard/thirdparty/mali_kbase_mmap.c
@@ -35,7 +35,7 @@
 /* mali_kbase_mmap.c
  *
  * This file contains Linux specific implementation of
- * kbase_get_unmapped_area() interface.
+ * kbase_context_get_unmapped_area() interface.
  */
 
 
@@ -253,11 +253,10 @@
  * simplified slightly. Modifications come from the fact that some values
  * about the memory area are known in advance.
  */
-unsigned long kbase_get_unmapped_area(struct file *filp,
+unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 		const unsigned long addr, const unsigned long len,
 		const unsigned long pgoff, const unsigned long flags)
 {
-	struct kbase_context *kctx = filp->private_data;
 	struct mm_struct *mm = current->mm;
 	struct vm_unmapped_area_info info;
 	unsigned long align_offset = 0;
@@ -337,8 +336,8 @@
 			kbase_gpu_vm_unlock(kctx);
 #ifndef CONFIG_64BIT
 	} else {
-		return current->mm->get_unmapped_area(filp, addr, len, pgoff,
-						      flags);
+		return current->mm->get_unmapped_area(
+			kctx->filp, addr, len, pgoff, flags);
 #endif
 	}
 
diff --git a/include/linux/memory_group_manager.h b/include/linux/memory_group_manager.h
new file mode 100644
index 0000000..b1ac253
--- /dev/null
+++ b/include/linux/memory_group_manager.h
@@ -0,0 +1,198 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _MEMORY_GROUP_MANAGER_H_
+#define _MEMORY_GROUP_MANAGER_H_
+
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/version.h>
+
+#if (KERNEL_VERSION(4, 17, 0) > LINUX_VERSION_CODE)
+typedef int vm_fault_t;
+#endif
+
+#define MEMORY_GROUP_MANAGER_NR_GROUPS (16)
+
+struct memory_group_manager_device;
+struct memory_group_manager_import_data;
+
+/**
+ * struct memory_group_manager_ops - Callbacks for memory group manager
+ *                                   operations
+ *
+ * @mgm_alloc_page:           Callback to allocate physical memory in a group
+ * @mgm_free_page:            Callback to free physical memory in a group
+ * @mgm_get_import_memory_id: Callback to get the group ID for imported memory
+ * @mgm_update_gpu_pte:       Callback to modify a GPU page table entry
+ * @mgm_vmf_insert_pfn_prot:  Callback to map a physical memory page for the CPU
+ */
+struct memory_group_manager_ops {
+	/**
+	 * mgm_alloc_page - Allocate a physical memory page in a group
+	 *
+	 * @mgm_dev:  The memory group manager through which the request is
+	 *            being made.
+	 * @group_id: A physical memory group ID. The meaning of this is defined
+	 *            by the systems integrator. Its valid range is
+	 *            0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+	 * @gfp_mask: Bitmask of Get Free Page flags affecting allocator
+	 *            behavior.
+	 * @order:    Page order for physical page size (order=0 means 4 KiB,
+	 *            order=9 means 2 MiB).
+	 *
+	 * Return: Pointer to allocated page, or NULL if allocation failed.
+	 */
+	struct page *(*mgm_alloc_page)(
+		struct memory_group_manager_device *mgm_dev, int group_id,
+		gfp_t gfp_mask, unsigned int order);
+
+	/**
+	 * mgm_free_page - Free a physical memory page in a group
+	 *
+	 * @mgm_dev:  The memory group manager through which the request
+	 *            is being made.
+	 * @group_id: A physical memory group ID. The meaning of this is
+	 *            defined by the systems integrator. Its valid range is
+	 *            0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+	 * @page:     Address of the struct associated with a page of physical
+	 *            memory that was allocated by calling the mgm_alloc_page
+	 *            method of the same memory pool with the same values of
+	 *            @group_id and @order.
+	 * @order:    Page order for physical page size (order=0 means 4 KiB,
+	 *            order=9 means 2 MiB).
+	 */
+	void (*mgm_free_page)(
+		struct memory_group_manager_device *mgm_dev, int group_id,
+		struct page *page, unsigned int order);
+
+	/**
+	 * mgm_get_import_memory_id - Get the physical memory group ID for the
+	 *                            imported memory
+	 *
+	 * @mgm_dev:     The memory group manager through which the request
+	 *               is being made.
+	 * @import_data: Pointer to the data which describes imported memory.
+	 *
+	 * Note that provision of this call back is optional, where it is not
+	 * provided this call back pointer must be set to NULL to indicate it
+	 * is not in use.
+	 *
+	 * Return: The memory group ID to use when mapping pages from this
+	 *         imported memory.
+	 */
+	int (*mgm_get_import_memory_id)(
+		struct memory_group_manager_device *mgm_dev,
+		struct memory_group_manager_import_data *import_data);
+
+	/**
+	 * mgm_update_gpu_pte - Modify a GPU page table entry for a memory group
+	 *
+	 * @mgm_dev:   The memory group manager through which the request
+	 *             is being made.
+	 * @group_id:  A physical memory group ID. The meaning of this is
+	 *             defined by the systems integrator. Its valid range is
+	 *             0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+	 * @mmu_level: The level of the page table entry in @ate.
+	 * @pte:       The page table entry to modify, in LPAE or AArch64 format
+	 *             (depending on the driver's configuration). This should be
+	 *             decoded to determine the physical address and any other
+	 *             properties of the mapping the manager requires.
+	 *
+	 * This function allows the memory group manager to modify a GPU page
+	 * table entry before it is stored by the kbase module (controller
+	 * driver). It may set certain bits in the page table entry attributes
+	 * or in the physical address, based on the physical memory group ID.
+	 *
+	 * Return: A modified GPU page table entry to be stored in a page table.
+	 */
+	u64 (*mgm_update_gpu_pte)(struct memory_group_manager_device *mgm_dev,
+			int group_id, int mmu_level, u64 pte);
+
+	/**
+	 * mgm_vmf_insert_pfn_prot - Map a physical page in a group for the CPU
+	 *
+	 * @mgm_dev:   The memory group manager through which the request
+	 *             is being made.
+	 * @group_id:  A physical memory group ID. The meaning of this is
+	 *             defined by the systems integrator. Its valid range is
+	 *             0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
+	 * @vma:       The virtual memory area to insert the page into.
+	 * @addr:      A virtual address (in @vma) to assign to the page.
+	 * @pfn:       The kernel Page Frame Number to insert at @addr in @vma.
+	 * @pgprot:    Protection flags for the inserted page.
+	 *
+	 * Called from a CPU virtual memory page fault handler. This function
+	 * creates a page table entry from the given parameter values and stores
+	 * it at the appropriate location (unlike mgm_update_gpu_pte, which
+	 * returns a modified entry).
+	 *
+	 * Return: Type of fault that occurred or VM_FAULT_NOPAGE if the page
+	 *         table entry was successfully installed.
+	 */
+	vm_fault_t (*mgm_vmf_insert_pfn_prot)(
+		struct memory_group_manager_device *mgm_dev, int group_id,
+		struct vm_area_struct *vma, unsigned long addr,
+		unsigned long pfn, pgprot_t pgprot);
+};
+
+/**
+ * struct memory_group_manager_device - Device structure for a memory group
+ *                                      manager
+ *
+ * @ops  - Callbacks associated with this device
+ * @data - Pointer to device private data
+ *
+ * In order for a systems integrator to provide custom behaviors for memory
+ * operations performed by the kbase module (controller driver), they must
+ * provide a platform-specific driver module which implements this interface.
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct memory_group_manager_device {
+	struct memory_group_manager_ops ops;
+	void *data;
+	struct module *owner;
+};
+
+
+enum memory_group_manager_import_type {
+	MEMORY_GROUP_MANAGER_IMPORT_TYPE_DMA_BUF
+};
+
+/**
+ * struct memory_group_manager_import_data - Structure describing the imported
+ *                                           memory
+ *
+ * @type  - type of imported memory
+ * @u     - Union describing the imported memory
+ *
+ */
+struct memory_group_manager_import_data {
+	enum memory_group_manager_import_type type;
+	union {
+		struct dma_buf *dma_buf;
+	} u;
+};
+
+#endif /* _MEMORY_GROUP_MANAGER_H_ */