| /*************************************************************************/ /*! |
| @File cache_km.c |
| @Title CPU d-cache maintenance operations framework |
| @Copyright Copyright (c) Imagination Technologies Ltd. All Rights Reserved |
| @Description Implements server side code for CPU d-cache maintenance taking |
| into account the idiosyncrasies of the various types of CPU |
| d-cache instruction-set architecture (ISA) maintenance |
| mechanisms. |
| @License Dual MIT/GPLv2 |
| |
| The contents of this file are subject to the MIT license as set out below. |
| |
| Permission is hereby granted, free of charge, to any person obtaining a copy |
| of this software and associated documentation files (the "Software"), to deal |
| in the Software without restriction, including without limitation the rights |
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| copies of the Software, and to permit persons to whom the Software is |
| furnished to do so, subject to the following conditions: |
| |
| The above copyright notice and this permission notice shall be included in |
| all copies or substantial portions of the Software. |
| |
| Alternatively, the contents of this file may be used under the terms of |
| the GNU General Public License Version 2 ("GPL") in which case the provisions |
| of GPL are applicable instead of those above. |
| |
| If you wish to allow use of your version of this file only under the terms of |
| GPL, and not to allow others to use your version of this file under the terms |
| of the MIT license, indicate your decision by deleting the provisions above |
| and replace them with the notice and other provisions required by GPL as set |
| out in the file called "GPL-COPYING" included in this distribution. If you do |
| not delete the provisions above, a recipient may use your version of this file |
| under the terms of either the MIT license or GPL. |
| |
| This License is also included in this distribution in the file called |
| "MIT-COPYING". |
| |
| EXCEPT AS OTHERWISE STATED IN A NEGOTIATED AGREEMENT: (A) THE SOFTWARE IS |
| PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING |
| BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
| PURPOSE AND NONINFRINGEMENT; AND (B) IN NO EVENT SHALL THE AUTHORS OR |
| COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
| IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| */ /**************************************************************************/ |
| #if defined(LINUX) |
| #include <linux/version.h> |
| #include <linux/uaccess.h> |
| #include <asm/current.h> |
| #include <linux/sched.h> |
| #include <linux/mm.h> |
| #include <linux/highmem.h> |
| #endif |
| |
| #include "pmr.h" |
| #include "log2.h" |
| #include "device.h" |
| #include "pvrsrv.h" |
| #include "osfunc.h" |
| #include "cache_km.h" |
| #include "pvr_debug.h" |
| #include "lock_types.h" |
| #include "allocmem.h" |
| #include "process_stats.h" |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| #include "ri_server.h" |
| #endif |
| #include "devicemem.h" |
| #include "pvrsrv_apphint.h" |
| #include "pvrsrv_sync_server.h" |
| #include "km_apphint_defs.h" |
| |
| /* This header must always be included last */ |
| #if defined(LINUX) |
| #include "kernel_compatibility.h" |
| #endif |
| |
| /* Top-level file-local build definitions */ |
| #if defined(PVRSRV_ENABLE_CACHEOP_STATS) && defined(LINUX) |
| #define CACHEOP_DEBUG |
| #define CACHEOP_STATS_ITEMS_MAX 32 |
| #define INCR_WRAP(x) ((x+1) >= CACHEOP_STATS_ITEMS_MAX ? 0 : (x+1)) |
| #define DECR_WRAP(x) ((x-1) < 0 ? (CACHEOP_STATS_ITEMS_MAX-1) : (x-1)) |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| /* Refer to CacheOpStatsExecLogHeader() for header item names */ |
| #define CACHEOP_RI_PRINTF_HEADER "%-8s %-10s %-10s %-5s %-16s %-16s %-10s %-10s %-18s %-18s %-12s" |
| #define CACHEOP_RI_PRINTF "%-8d %-10s %-10s %-5s 0x%-14llx 0x%-14llx 0x%-8llx 0x%-8llx %-18llu %-18llu 0x%-10x\n" |
| #else |
| #define CACHEOP_PRINTF_HEADER "%-8s %-10s %-10s %-5s %-10s %-10s %-18s %-18s %-12s" |
| #define CACHEOP_PRINTF "%-8d %-10s %-10s %-5s 0x%-8llx 0x%-8llx %-18llu %-18llu 0x%-10x\n" |
| #endif |
| #endif |
| |
| //#define CACHEOP_NO_CACHE_LINE_ALIGNED_ROUNDING /* Force OS page (not cache line) flush granularity */ |
| #define CACHEOP_PVR_ASSERT(x) /* Define as PVR_ASSERT(x), enable for swdev & testing */ |
| #if defined(PVRSRV_SERVER_THREADS_INDEFINITE_SLEEP) |
| #define CACHEOP_THREAD_WAIT_TIMEOUT 0ULL /* Wait indefinitely */ |
| #else |
| #define CACHEOP_THREAD_WAIT_TIMEOUT 500000ULL /* Wait 500ms between wait unless woken-up on demand */ |
| #endif |
| #define CACHEOP_FENCE_WAIT_TIMEOUT 1000ULL /* Wait 1ms between wait events unless woken-up */ |
| #define CACHEOP_FENCE_RETRY_ABORT 1000ULL /* Fence retries that aborts fence operation */ |
| #define CACHEOP_SEQ_MIDPOINT (IMG_UINT32) 0x7FFFFFFF /* Where seqNum(s) are rebase, compared at */ |
| #define CACHEOP_ABORT_FENCE_ERROR_STRING "detected stalled client, retrying cacheop fence" |
| #define CACHEOP_NO_GFLUSH_ERROR_STRING "global flush requested on CPU without support" |
| #define CACHEOP_DEVMEM_OOR_ERROR_STRING "cacheop device memory request is out of range" |
| #define CACHEOP_MAX_DEBUG_MESSAGE_LEN 160 |
| |
| typedef struct _CACHEOP_WORK_ITEM_ |
| { |
| PMR *psPMR; |
| IMG_UINT32 ui32GFSeqNum; |
| IMG_UINT32 ui32OpSeqNum; |
| IMG_DEVMEM_SIZE_T uiSize; |
| PVRSRV_CACHE_OP uiCacheOp; |
| IMG_DEVMEM_OFFSET_T uiOffset; |
| PVRSRV_TIMELINE iTimeline; |
| SYNC_TIMELINE_OBJ sSWTimelineObj; |
| PVRSRV_DEVICE_NODE *psDevNode; |
| #if defined(CACHEOP_DEBUG) |
| IMG_UINT64 ui64EnqueuedTime; |
| IMG_UINT64 ui64DequeuedTime; |
| IMG_UINT64 ui64ExecuteTime; |
| IMG_BOOL bDeferred; |
| IMG_BOOL bKMReq; |
| IMG_BOOL bRBF; |
| IMG_BOOL bUMF; |
| IMG_PID pid; |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| RGXFWIF_DM eFenceOpType; |
| #endif |
| #endif |
| } CACHEOP_WORK_ITEM; |
| |
| typedef struct _CACHEOP_STATS_EXEC_ITEM_ |
| { |
| IMG_PID pid; |
| IMG_UINT32 ui32OpSeqNum; |
| PVRSRV_CACHE_OP uiCacheOp; |
| IMG_DEVMEM_SIZE_T uiOffset; |
| IMG_DEVMEM_SIZE_T uiSize; |
| IMG_UINT64 ui64EnqueuedTime; |
| IMG_UINT64 ui64DequeuedTime; |
| IMG_UINT64 ui64ExecuteTime; |
| IMG_BOOL bIsFence; |
| IMG_BOOL bKMReq; |
| IMG_BOOL bRBF; |
| IMG_BOOL bUMF; |
| IMG_BOOL bDeferred; |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| IMG_DEV_VIRTADDR sDevVAddr; |
| IMG_DEV_PHYADDR sDevPAddr; |
| RGXFWIF_DM eFenceOpType; |
| #endif |
| } CACHEOP_STATS_EXEC_ITEM; |
| |
| typedef enum _CACHEOP_CONFIG_ |
| { |
| CACHEOP_CONFIG_DEFAULT = 0, |
| /* cache flush mechanism types */ |
| CACHEOP_CONFIG_KRBF = 1, |
| CACHEOP_CONFIG_KGF = 2, |
| CACHEOP_CONFIG_URBF = 4, |
| /* sw-emulated deferred flush mechanism */ |
| CACHEOP_CONFIG_KDF = 8, |
| /* pseudo configuration items */ |
| CACHEOP_CONFIG_LAST = 16, |
| CACHEOP_CONFIG_KLOG = 16, |
| CACHEOP_CONFIG_ALL = 31 |
| } CACHEOP_CONFIG; |
| |
| typedef struct _CACHEOP_WORK_QUEUE_ |
| { |
| /* |
| * Init. state & primary device node framework |
| * is anchored on. |
| */ |
| IMG_BOOL bInit; |
| /* |
| MMU page size/shift & d-cache line size |
| */ |
| size_t uiPageSize; |
| IMG_UINT32 uiLineSize; |
| IMG_UINT32 uiLineShift; |
| IMG_UINT32 uiPageShift; |
| PVRSRV_CACHE_OP_ADDR_TYPE uiCacheOpAddrType; |
| /* |
| CacheOp deferred queueing protocol |
| + Implementation geared for performance, atomic counter based |
| - Value Space is 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> n. |
| - Index Space is 0 -> 1 -> 2 -> 3 -> 0 -> 1 -> 2 -> 3 -> 0 -> m. |
| - Index = Value modulo CACHEOP_INDICES_LOG2_SIZE. |
| + Write counter never collides with read counter in index space |
| - Unless at start of day when both are initialised to zero. |
| - This means we sacrifice one entry when the queue is full. |
| - Incremented by producer |
| - Value space tracks total number of CacheOps queued. |
| - Index space identifies CacheOp CCB queue index. |
| + Read counter increments towards write counter in value space |
| - Empty queue occurs when read equals write counter. |
| - Wrap-round logic handled by consumer as/when needed. |
| - Incremented by consumer |
| - Value space tracks total # of CacheOps executed. |
| - Index space identifies CacheOp CCB queue index. |
| + Total queued size adjusted up/down during write/read activity |
| - Counter might overflow but does not compromise framework. |
| */ |
| ATOMIC_T hReadCounter; |
| ATOMIC_T hWriteCounter; |
| /* |
| CacheOp sequence numbers |
| + hCommonSeqNum: |
| - Common sequence, numbers every CacheOp operation in both UM/KM. |
| - In KM |
| - Every deferred CacheOp (on behalf of UM) gets a unique seqNum. |
| - Last executed deferred CacheOp updates gsCwq.hCompletedSeqNum. |
| - Every GF operation (if supported) also gets a unique seqNum. |
| - Last executed GF operation updates CACHEOP_INFO_GFSEQNUM0. |
| - Under debug, all CacheOp gets a unique seqNum for tracking. |
| - This includes all UM/KM synchronous non-deferred CacheOp(s) |
| - In UM |
| - If the processor architecture supports GF maintenance (in KM) |
| - All UM CacheOp samples CACHEOP_INFO_GFSEQNUM0 via info. page. |
| - CacheOp(s) discarded if another GF occurs before execution. |
| - CacheOp(s) discarding happens in both UM and KM space. |
| + hCompletedSeqNum: |
| - Tracks last executed KM/deferred RBF/Global<timeline> CacheOp(s) |
| + hDeferredSize: |
| - Running total of size of currently deferred CacheOp in queue. |
| */ |
| ATOMIC_T hDeferredSize; |
| ATOMIC_T hCommonSeqNum; |
| ATOMIC_T hCompletedSeqNum; |
| /* |
| CacheOp information page |
| + psInfoPagePMR: |
| - Single system-wide OS page that is multi-mapped in UM/KM. |
| - Mapped into clients using read-only memory protection. |
| - Mapped into server using read/write memory protection. |
| - Contains information pertaining to cache framework. |
| + pui32InfoPage: |
| - Server linear address pointer to said information page. |
| - Each info-page entry currently of sizeof(IMG_UINT32). |
| */ |
| PMR *psInfoPagePMR; |
| IMG_UINT32 *pui32InfoPage; |
| /* |
| CacheOp deferred work-item queue |
| + CACHEOP_INDICES_LOG2_SIZE |
| - Sized using GF/RBF ratio |
| */ |
| #define CACHEOP_INDICES_LOG2_SIZE (4) |
| #define CACHEOP_INDICES_MAX (1 << CACHEOP_INDICES_LOG2_SIZE) |
| #define CACHEOP_INDICES_MASK (CACHEOP_INDICES_MAX-1) |
| CACHEOP_WORK_ITEM asWorkItems[CACHEOP_INDICES_MAX]; |
| #if defined(CACHEOP_DEBUG) |
| /* |
| CacheOp statistics |
| */ |
| void *pvStatsEntry; |
| IMG_HANDLE hStatsExecLock; |
| IMG_UINT32 ui32ServerASync; |
| IMG_UINT32 ui32ServerSyncVA; |
| IMG_UINT32 ui32ServerSync; |
| IMG_UINT32 ui32ServerRBF; |
| IMG_UINT32 ui32ServerGF; |
| IMG_UINT32 ui32ServerDGF; |
| IMG_UINT32 ui32ServerDTL; |
| IMG_UINT32 ui32ClientSync; |
| IMG_UINT32 ui32ClientRBF; |
| IMG_UINT32 ui32KMDiscards; |
| IMG_UINT32 ui32UMDiscards; |
| IMG_UINT32 ui32TotalFenceOps; |
| IMG_UINT32 ui32TotalExecOps; |
| IMG_UINT32 ui32AvgExecTime; |
| IMG_UINT32 ui32AvgFenceTime; |
| IMG_INT32 i32StatsExecWriteIdx; |
| CACHEOP_STATS_EXEC_ITEM asStatsExecuted[CACHEOP_STATS_ITEMS_MAX]; |
| #endif |
| /* |
| CacheOp (re)configuration |
| */ |
| void *pvConfigTune; |
| IMG_HANDLE hConfigLock; |
| /* |
| CacheOp deferred worker thread |
| + eConfig |
| - Runtime configuration |
| + hWorkerThread |
| - CacheOp thread handler |
| + hThreadWakeUpEvtObj |
| - Event object to drive CacheOp worker thread sleep/wake-ups. |
| + hClientWakeUpEvtObj |
| - Event object to unblock stalled clients waiting on queue. |
| */ |
| CACHEOP_CONFIG eConfig; |
| IMG_UINT32 ui32Config; |
| IMG_HANDLE hWorkerThread; |
| IMG_HANDLE hDeferredLock; |
| IMG_HANDLE hGlobalFlushLock; |
| IMG_HANDLE hThreadWakeUpEvtObj; |
| IMG_HANDLE hClientWakeUpEvtObj; |
| IMG_UINT32 ui32FenceWaitTimeUs; |
| IMG_UINT32 ui32FenceRetryAbort; |
| IMG_BOOL bNoGlobalFlushImpl; |
| IMG_BOOL bSupportsUMFlush; |
| } CACHEOP_WORK_QUEUE; |
| |
| /* Top-level CacheOp framework object */ |
| static CACHEOP_WORK_QUEUE gsCwq; |
| |
| #define CacheOpConfigSupports(e) ((gsCwq.eConfig & (e)) ? IMG_TRUE : IMG_FALSE) |
| |
| static INLINE IMG_UINT32 CacheOpIdxRead(ATOMIC_T *phCounter) |
| { |
| IMG_UINT32 ui32Idx = OSAtomicRead(phCounter); |
| return ui32Idx & CACHEOP_INDICES_MASK; |
| } |
| |
| static INLINE IMG_UINT32 CacheOpIdxIncrement(ATOMIC_T *phCounter) |
| { |
| IMG_UINT32 ui32Idx = OSAtomicIncrement(phCounter); |
| return ui32Idx & CACHEOP_INDICES_MASK; |
| } |
| |
| static INLINE IMG_UINT32 CacheOpIdxNext(ATOMIC_T *phCounter) |
| { |
| IMG_UINT32 ui32Idx = OSAtomicRead(phCounter); |
| return ++ui32Idx & CACHEOP_INDICES_MASK; |
| } |
| |
| static INLINE IMG_UINT32 CacheOpIdxSpan(ATOMIC_T *phLhs, ATOMIC_T *phRhs) |
| { |
| return OSAtomicRead(phLhs) - OSAtomicRead(phRhs); |
| } |
| |
| static INLINE IMG_UINT64 DivBy10(IMG_UINT64 uiNum) |
| { |
| IMG_UINT64 uiQuot; |
| IMG_UINT64 uiRem; |
| |
| uiQuot = (uiNum >> 1) + (uiNum >> 2); |
| uiQuot = uiQuot + (uiQuot >> 4); |
| uiQuot = uiQuot + (uiQuot >> 8); |
| uiQuot = uiQuot + (uiQuot >> 16); |
| uiQuot = uiQuot >> 3; |
| uiRem = uiNum - (((uiQuot << 2) + uiQuot) << 1); |
| |
| return uiQuot + (uiRem > 9); |
| } |
| |
| /* Callback to dump info of cacheop thread in debug_dump */ |
| static void CacheOpThreadDumpInfo(DUMPDEBUG_PRINTF_FUNC* pfnDumpDebugPrintf, |
| void *pvDumpDebugFile) |
| { |
| PVR_DUMPDEBUG_LOG(" Configuration: QSZ: %d, UKT: %d, KDFT: %d, " |
| "KGFT: %d, LINESIZE: %d, PGSIZE: %d, KDF: %s, " |
| "URBF: %s, KGF: %s, KRBF: %s", |
| CACHEOP_INDICES_MAX, |
| gsCwq.pui32InfoPage[CACHEOP_INFO_UMKMTHRESHLD], |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD], |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD], |
| gsCwq.pui32InfoPage[CACHEOP_INFO_LINESIZE], |
| gsCwq.pui32InfoPage[CACHEOP_INFO_PGSIZE], |
| gsCwq.eConfig & CACHEOP_CONFIG_KDF ? "Yes" : "No", |
| gsCwq.eConfig & CACHEOP_CONFIG_URBF ? "Yes" : "No", |
| gsCwq.eConfig & CACHEOP_CONFIG_KGF ? "Yes" : "No", |
| gsCwq.eConfig & CACHEOP_CONFIG_KRBF ? "Yes" : "No" |
| ); |
| PVR_DUMPDEBUG_LOG(" Pending deferred CacheOp entries : %u", |
| CacheOpIdxSpan(&gsCwq.hWriteCounter, &gsCwq.hReadCounter)); |
| } |
| |
| #if defined(CACHEOP_DEBUG) |
| static INLINE void CacheOpStatsExecLogHeader(IMG_CHAR szBuffer[CACHEOP_MAX_DEBUG_MESSAGE_LEN]) |
| { |
| OSSNPrintf(szBuffer, CACHEOP_MAX_DEBUG_MESSAGE_LEN, |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| CACHEOP_RI_PRINTF_HEADER, |
| #else |
| CACHEOP_PRINTF_HEADER, |
| #endif |
| "Pid", |
| "CacheOp", |
| " Type", |
| "Mode", |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| "DevVAddr", |
| "DevPAddr", |
| #endif |
| "Offset", |
| "Size", |
| "xTime (us)", |
| "qTime (us)", |
| "SeqNum"); |
| } |
| |
| static void CacheOpStatsExecLogWrite(CACHEOP_WORK_ITEM *psCacheOpWorkItem) |
| { |
| IMG_UINT64 ui64ExecuteTime; |
| IMG_UINT64 ui64EnqueuedTime; |
| IMG_INT32 i32WriteOffset; |
| |
| if (!psCacheOpWorkItem->ui32OpSeqNum && !psCacheOpWorkItem->uiCacheOp) |
| { |
| /* This breaks the logic of read-out, so we do not queue items |
| with zero sequence number and no CacheOp */ |
| return; |
| } |
| else if (psCacheOpWorkItem->bKMReq && !CacheOpConfigSupports(CACHEOP_CONFIG_KLOG)) |
| { |
| /* KM logs spams the history due to frequency, this remove its completely */ |
| return; |
| } |
| |
| OSLockAcquire(gsCwq.hStatsExecLock); |
| |
| i32WriteOffset = gsCwq.i32StatsExecWriteIdx; |
| gsCwq.asStatsExecuted[i32WriteOffset].pid = psCacheOpWorkItem->pid; |
| gsCwq.i32StatsExecWriteIdx = INCR_WRAP(gsCwq.i32StatsExecWriteIdx); |
| gsCwq.asStatsExecuted[i32WriteOffset].bRBF = psCacheOpWorkItem->bRBF; |
| gsCwq.asStatsExecuted[i32WriteOffset].bUMF = psCacheOpWorkItem->bUMF; |
| gsCwq.asStatsExecuted[i32WriteOffset].uiSize = psCacheOpWorkItem->uiSize; |
| gsCwq.asStatsExecuted[i32WriteOffset].bKMReq = psCacheOpWorkItem->bKMReq; |
| gsCwq.asStatsExecuted[i32WriteOffset].uiOffset = psCacheOpWorkItem->uiOffset; |
| gsCwq.asStatsExecuted[i32WriteOffset].uiCacheOp = psCacheOpWorkItem->uiCacheOp; |
| gsCwq.asStatsExecuted[i32WriteOffset].bDeferred = psCacheOpWorkItem->bDeferred; |
| gsCwq.asStatsExecuted[i32WriteOffset].ui32OpSeqNum = psCacheOpWorkItem->ui32OpSeqNum; |
| gsCwq.asStatsExecuted[i32WriteOffset].ui64ExecuteTime = psCacheOpWorkItem->ui64ExecuteTime; |
| gsCwq.asStatsExecuted[i32WriteOffset].ui64EnqueuedTime = psCacheOpWorkItem->ui64EnqueuedTime; |
| gsCwq.asStatsExecuted[i32WriteOffset].ui64DequeuedTime = psCacheOpWorkItem->ui64DequeuedTime; |
| /* During early system initialisation, only non-fence & non-PMR CacheOps are processed */ |
| gsCwq.asStatsExecuted[i32WriteOffset].bIsFence = gsCwq.bInit && !psCacheOpWorkItem->psPMR; |
| CACHEOP_PVR_ASSERT(gsCwq.asStatsExecuted[i32WriteOffset].pid); |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| if (gsCwq.bInit && psCacheOpWorkItem->psPMR) |
| { |
| IMG_CPU_PHYADDR sDevPAddr; |
| PVRSRV_ERROR eError; |
| IMG_BOOL bValid; |
| |
| /* Get more detailed information regarding the sub allocations that |
| PMR has from RI manager for process that requested the CacheOp */ |
| eError = RIDumpProcessListKM(psCacheOpWorkItem->psPMR, |
| gsCwq.asStatsExecuted[i32WriteOffset].pid, |
| gsCwq.asStatsExecuted[i32WriteOffset].uiOffset, |
| &gsCwq.asStatsExecuted[i32WriteOffset].sDevVAddr); |
| if (eError != PVRSRV_OK) |
| { |
| goto e0; |
| } |
| |
| /* (Re)lock here as some PMR might have not been locked */ |
| eError = PMRLockSysPhysAddresses(psCacheOpWorkItem->psPMR); |
| if (eError != PVRSRV_OK) |
| { |
| goto e0; |
| } |
| |
| eError = PMR_CpuPhysAddr(psCacheOpWorkItem->psPMR, |
| gsCwq.uiPageShift, |
| 1, |
| gsCwq.asStatsExecuted[i32WriteOffset].uiOffset, |
| &sDevPAddr, |
| &bValid); |
| if (eError != PVRSRV_OK) |
| { |
| eError = PMRUnlockSysPhysAddresses(psCacheOpWorkItem->psPMR); |
| PVR_LOG_IF_ERROR(eError, "PMRUnlockSysPhysAddresses"); |
| goto e0; |
| } |
| |
| eError = PMRUnlockSysPhysAddresses(psCacheOpWorkItem->psPMR); |
| PVR_LOG_IF_ERROR(eError, "PMRUnlockSysPhysAddresses"); |
| |
| gsCwq.asStatsExecuted[i32WriteOffset].sDevPAddr.uiAddr = sDevPAddr.uiAddr; |
| } |
| |
| if (gsCwq.asStatsExecuted[i32WriteOffset].bIsFence) |
| { |
| gsCwq.asStatsExecuted[i32WriteOffset].eFenceOpType = psCacheOpWorkItem->eFenceOpType; |
| } |
| #endif |
| |
| /* Convert timing from nano-seconds to micro-seconds */ |
| ui64ExecuteTime = gsCwq.asStatsExecuted[i32WriteOffset].ui64ExecuteTime; |
| ui64EnqueuedTime = gsCwq.asStatsExecuted[i32WriteOffset].ui64EnqueuedTime; |
| ui64ExecuteTime = DivBy10(DivBy10(DivBy10(ui64ExecuteTime))); |
| ui64EnqueuedTime = DivBy10(DivBy10(DivBy10(ui64EnqueuedTime))); |
| |
| /* Coalesced (to global) deferred CacheOps do not contribute to statistics, |
| as both enqueue/execute time is identical for these CacheOps */ |
| if (!gsCwq.asStatsExecuted[i32WriteOffset].bIsFence) |
| { |
| /* Calculate the rolling approximate average execution time */ |
| IMG_UINT32 ui32Time = ui64EnqueuedTime < ui64ExecuteTime ? |
| ui64ExecuteTime - ui64EnqueuedTime : |
| ui64EnqueuedTime - ui64ExecuteTime; |
| if (gsCwq.ui32TotalExecOps > 2 && ui32Time) |
| { |
| gsCwq.ui32AvgExecTime -= (gsCwq.ui32AvgExecTime / gsCwq.ui32TotalExecOps); |
| gsCwq.ui32AvgExecTime += (ui32Time / gsCwq.ui32TotalExecOps); |
| } |
| else if (ui32Time) |
| { |
| gsCwq.ui32AvgExecTime = (IMG_UINT32)ui32Time; |
| } |
| } |
| |
| if (! gsCwq.asStatsExecuted[i32WriteOffset].bKMReq) |
| { |
| /* This operation queues only UM CacheOp in per-PID process statistics database */ |
| PVRSRVStatsUpdateCacheOpStats(gsCwq.asStatsExecuted[i32WriteOffset].uiCacheOp, |
| gsCwq.asStatsExecuted[i32WriteOffset].ui32OpSeqNum, |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| gsCwq.asStatsExecuted[i32WriteOffset].sDevVAddr, |
| gsCwq.asStatsExecuted[i32WriteOffset].sDevPAddr, |
| gsCwq.asStatsExecuted[i32WriteOffset].eFenceOpType, |
| #endif |
| gsCwq.asStatsExecuted[i32WriteOffset].uiOffset, |
| gsCwq.asStatsExecuted[i32WriteOffset].uiSize, |
| ui64EnqueuedTime < ui64ExecuteTime ? |
| ui64ExecuteTime - ui64EnqueuedTime: |
| ui64EnqueuedTime - ui64ExecuteTime, |
| gsCwq.asStatsExecuted[i32WriteOffset].bRBF, |
| gsCwq.asStatsExecuted[i32WriteOffset].bUMF, |
| gsCwq.asStatsExecuted[i32WriteOffset].bIsFence, |
| psCacheOpWorkItem->pid); |
| } |
| |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| e0: |
| #endif |
| OSLockRelease(gsCwq.hStatsExecLock); |
| } |
| |
| static void CacheOpStatsExecLogRead(void *pvFilePtr, void *pvData, |
| OS_STATS_PRINTF_FUNC* pfnOSStatsPrintf) |
| { |
| IMG_CHAR *pszFlushype; |
| IMG_CHAR *pszCacheOpType; |
| IMG_CHAR *pszFlushSource; |
| IMG_INT32 i32ReadOffset; |
| IMG_INT32 i32WriteOffset; |
| IMG_UINT64 ui64EnqueuedTime; |
| IMG_UINT64 ui64DequeuedTime; |
| IMG_UINT64 ui64ExecuteTime; |
| IMG_CHAR szBuffer[CACHEOP_MAX_DEBUG_MESSAGE_LEN] = {0}; |
| PVR_UNREFERENCED_PARAMETER(pvData); |
| |
| OSLockAcquire(gsCwq.hStatsExecLock); |
| |
| pfnOSStatsPrintf(pvFilePtr, |
| "Primary CPU d-cache architecture: LSZ: 0x%d, URBF: %s, KGF: %s, KRBF: %s\n", |
| gsCwq.uiLineSize, |
| gsCwq.bSupportsUMFlush ? "Yes" : "No", |
| !gsCwq.bNoGlobalFlushImpl ? "Yes" : "No", |
| "Yes" /* KRBF mechanism always available */ |
| ); |
| |
| pfnOSStatsPrintf(pvFilePtr, |
| "Configuration: QSZ: %d, UKT: %d, KDFT: %d, KGFT: %d, KDF: %s, URBF: %s, KGF: %s, KRBF: %s\n", |
| CACHEOP_INDICES_MAX, |
| gsCwq.pui32InfoPage[CACHEOP_INFO_UMKMTHRESHLD], |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD], |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD], |
| gsCwq.eConfig & CACHEOP_CONFIG_KDF ? "Yes" : "No", |
| gsCwq.eConfig & CACHEOP_CONFIG_URBF ? "Yes" : "No", |
| gsCwq.eConfig & CACHEOP_CONFIG_KGF ? "Yes" : "No", |
| gsCwq.eConfig & CACHEOP_CONFIG_KRBF ? "Yes" : "No" |
| ); |
| |
| pfnOSStatsPrintf(pvFilePtr, |
| "Summary: OP[F][TL] (tot.avg): %d.%d/%d.%d/%d, [KM][UM][A]SYNC: %d.%d/%d/%d, RBF (um/km): %d/%d, [D]GF (km): %d/%d, DSC (um/km): %d/%d\n", |
| gsCwq.ui32TotalExecOps, gsCwq.ui32AvgExecTime, gsCwq.ui32TotalFenceOps, gsCwq.ui32AvgFenceTime, gsCwq.ui32ServerDTL, |
| gsCwq.ui32ServerSync, gsCwq.ui32ServerSyncVA, gsCwq.ui32ClientSync, gsCwq.ui32ServerASync, |
| gsCwq.ui32ClientRBF, gsCwq.ui32ServerRBF, |
| gsCwq.ui32ServerDGF, gsCwq.ui32ServerGF, |
| gsCwq.ui32UMDiscards, gsCwq.ui32KMDiscards |
| ); |
| |
| CacheOpStatsExecLogHeader(szBuffer); |
| pfnOSStatsPrintf(pvFilePtr, "%s\n", szBuffer); |
| |
| i32WriteOffset = gsCwq.i32StatsExecWriteIdx; |
| for (i32ReadOffset = DECR_WRAP(i32WriteOffset); |
| i32ReadOffset != i32WriteOffset; |
| i32ReadOffset = DECR_WRAP(i32ReadOffset)) |
| { |
| if (!gsCwq.asStatsExecuted[i32ReadOffset].ui32OpSeqNum && |
| !gsCwq.asStatsExecuted[i32ReadOffset].uiCacheOp) |
| { |
| break; |
| } |
| |
| /* Convert from nano-seconds to micro-seconds */ |
| ui64ExecuteTime = gsCwq.asStatsExecuted[i32ReadOffset].ui64ExecuteTime; |
| ui64EnqueuedTime = gsCwq.asStatsExecuted[i32ReadOffset].ui64EnqueuedTime; |
| ui64DequeuedTime = gsCwq.asStatsExecuted[i32ReadOffset].ui64DequeuedTime; |
| ui64ExecuteTime = DivBy10(DivBy10(DivBy10(ui64ExecuteTime))); |
| ui64EnqueuedTime = DivBy10(DivBy10(DivBy10(ui64EnqueuedTime))); |
| ui64DequeuedTime = ui64DequeuedTime ? DivBy10(DivBy10(DivBy10(ui64DequeuedTime))) : 0; |
| |
| if (gsCwq.asStatsExecuted[i32ReadOffset].bIsFence) |
| { |
| IMG_CHAR *pszMode = ""; |
| IMG_CHAR *pszFenceType = ""; |
| pszCacheOpType = "Fence"; |
| |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| pszMode = gsCwq.asStatsExecuted[i32ReadOffset].uiCacheOp != PVRSRV_CACHE_OP_GLOBAL ? "" : " GF "; |
| switch (gsCwq.asStatsExecuted[i32ReadOffset].eFenceOpType) |
| { |
| case RGXFWIF_DM_GP: |
| pszFenceType = " GP/GF"; |
| break; |
| |
| case RGXFWIF_DM_TDM: |
| pszFenceType = " TDM "; |
| break; |
| |
| case RGXFWIF_DM_TA: |
| pszFenceType = " TA "; |
| break; |
| |
| case RGXFWIF_DM_3D: |
| pszFenceType = " PDM "; |
| break; |
| |
| case RGXFWIF_DM_CDM: |
| pszFenceType = " CDM "; |
| break; |
| |
| default: |
| CACHEOP_PVR_ASSERT(0); |
| break; |
| } |
| #else |
| /* The CacheOp fence operation also triggered a global cache flush operation */ |
| pszFenceType = |
| gsCwq.asStatsExecuted[i32ReadOffset].uiCacheOp != PVRSRV_CACHE_OP_GLOBAL ? "" : " GF "; |
| #endif |
| pfnOSStatsPrintf(pvFilePtr, |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| CACHEOP_RI_PRINTF, |
| #else |
| CACHEOP_PRINTF, |
| #endif |
| gsCwq.asStatsExecuted[i32ReadOffset].pid, |
| pszCacheOpType, |
| pszFenceType, |
| pszMode, |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| "", |
| "", |
| #endif |
| gsCwq.asStatsExecuted[i32ReadOffset].uiOffset, |
| gsCwq.asStatsExecuted[i32ReadOffset].uiSize, |
| ui64EnqueuedTime < ui64ExecuteTime ? |
| ui64ExecuteTime - ui64EnqueuedTime |
| : |
| ui64EnqueuedTime - ui64ExecuteTime, |
| ui64EnqueuedTime < ui64DequeuedTime ? |
| ui64DequeuedTime - ui64EnqueuedTime |
| : |
| !ui64DequeuedTime ? 0 : ui64EnqueuedTime - ui64DequeuedTime, |
| gsCwq.asStatsExecuted[i32ReadOffset].ui32OpSeqNum); |
| } |
| else |
| { |
| if (gsCwq.asStatsExecuted[i32ReadOffset].bRBF) |
| { |
| IMG_DEVMEM_SIZE_T ui64NumOfPages; |
| |
| ui64NumOfPages = gsCwq.asStatsExecuted[i32ReadOffset].uiSize >> gsCwq.uiPageShift; |
| if (ui64NumOfPages <= PMR_MAX_TRANSLATION_STACK_ALLOC) |
| { |
| pszFlushype = "RBF.Fast"; |
| } |
| else |
| { |
| pszFlushype = "RBF.Slow"; |
| } |
| } |
| else |
| { |
| pszFlushype = " GF "; |
| } |
| |
| if (gsCwq.asStatsExecuted[i32ReadOffset].bUMF) |
| { |
| pszFlushSource = " UM"; |
| } |
| else |
| { |
| /* |
| - Request originates directly from a KM thread or in KM (KM<), or |
| - Request originates from a UM thread and is KM deferred (KM+), or |
| - Request is/was discarded due to an 'else-[when,where]' GFlush |
| - i.e. GF occurs either (a)sync to current UM/KM thread |
| */ |
| pszFlushSource = |
| gsCwq.asStatsExecuted[i32ReadOffset].bKMReq ? " KM<" : |
| gsCwq.asStatsExecuted[i32ReadOffset].bDeferred && gsCwq.asStatsExecuted[i32ReadOffset].ui64ExecuteTime ? " KM+" : |
| !gsCwq.asStatsExecuted[i32ReadOffset].ui64ExecuteTime ? " KM-" : " KM"; |
| } |
| |
| switch (gsCwq.asStatsExecuted[i32ReadOffset].uiCacheOp) |
| { |
| case PVRSRV_CACHE_OP_NONE: |
| pszCacheOpType = "None"; |
| break; |
| case PVRSRV_CACHE_OP_CLEAN: |
| pszCacheOpType = "Clean"; |
| break; |
| case PVRSRV_CACHE_OP_INVALIDATE: |
| pszCacheOpType = "Invalidate"; |
| break; |
| case PVRSRV_CACHE_OP_FLUSH: |
| pszCacheOpType = "Flush"; |
| break; |
| case PVRSRV_CACHE_OP_GLOBAL: |
| pszCacheOpType = "GFlush"; |
| break; |
| case PVRSRV_CACHE_OP_TIMELINE: |
| pszCacheOpType = "Timeline"; |
| pszFlushype = " "; |
| break; |
| default: |
| if ((IMG_UINT32)gsCwq.asStatsExecuted[i32ReadOffset].uiCacheOp == (IMG_UINT32)(PVRSRV_CACHE_OP_GLOBAL|PVRSRV_CACHE_OP_TIMELINE)) |
| { |
| pszCacheOpType = "Timeline"; |
| } |
| else |
| { |
| pszCacheOpType = "Unknown"; |
| gsCwq.asStatsExecuted[i32ReadOffset].ui32OpSeqNum = |
| (IMG_UINT32) gsCwq.asStatsExecuted[i32ReadOffset].uiCacheOp; |
| } |
| break; |
| } |
| |
| pfnOSStatsPrintf(pvFilePtr, |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| CACHEOP_RI_PRINTF, |
| #else |
| CACHEOP_PRINTF, |
| #endif |
| gsCwq.asStatsExecuted[i32ReadOffset].pid, |
| pszCacheOpType, |
| pszFlushype, |
| pszFlushSource, |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| gsCwq.asStatsExecuted[i32ReadOffset].sDevVAddr.uiAddr, |
| gsCwq.asStatsExecuted[i32ReadOffset].sDevPAddr.uiAddr, |
| #endif |
| gsCwq.asStatsExecuted[i32ReadOffset].uiOffset, |
| gsCwq.asStatsExecuted[i32ReadOffset].uiSize, |
| ui64EnqueuedTime < ui64ExecuteTime ? |
| ui64ExecuteTime - ui64EnqueuedTime |
| : |
| ui64EnqueuedTime - ui64ExecuteTime, |
| ui64EnqueuedTime < ui64DequeuedTime ? |
| ui64DequeuedTime - ui64EnqueuedTime |
| : |
| !ui64DequeuedTime ? 0 : ui64EnqueuedTime - ui64DequeuedTime, |
| gsCwq.asStatsExecuted[i32ReadOffset].ui32OpSeqNum); |
| } |
| } |
| |
| OSLockRelease(gsCwq.hStatsExecLock); |
| } |
| #endif /* defined(CACHEOP_DEBUG) */ |
| |
| static INLINE void CacheOpStatsReset(void) |
| { |
| #if defined(CACHEOP_DEBUG) |
| gsCwq.ui32KMDiscards = 0; |
| gsCwq.ui32UMDiscards = 0; |
| gsCwq.ui32TotalExecOps = 0; |
| gsCwq.ui32TotalFenceOps = 0; |
| gsCwq.ui32AvgExecTime = 0; |
| gsCwq.ui32AvgFenceTime = 0; |
| gsCwq.ui32ClientRBF = 0; |
| gsCwq.ui32ClientSync = 0; |
| gsCwq.ui32ServerRBF = 0; |
| gsCwq.ui32ServerASync = 0; |
| gsCwq.ui32ServerSyncVA = 0; |
| gsCwq.ui32ServerSync = 0; |
| gsCwq.ui32ServerGF = 0; |
| gsCwq.ui32ServerDGF = 0; |
| gsCwq.ui32ServerDTL = 0; |
| gsCwq.i32StatsExecWriteIdx = 0; |
| OSCachedMemSet(gsCwq.asStatsExecuted, 0, sizeof(gsCwq.asStatsExecuted)); |
| #endif |
| } |
| |
| static void CacheOpConfigUpdate(IMG_UINT32 ui32Config) |
| { |
| OSLockAcquire(gsCwq.hConfigLock); |
| |
| /* Step 0, set the gsCwq.eConfig bits */ |
| if (!(ui32Config & (CACHEOP_CONFIG_LAST - 1))) |
| { |
| gsCwq.eConfig = CACHEOP_CONFIG_KRBF | CACHEOP_CONFIG_KDF; |
| if (! gsCwq.bNoGlobalFlushImpl) |
| { |
| gsCwq.eConfig |= CACHEOP_CONFIG_KGF; |
| } |
| if (gsCwq.bSupportsUMFlush) |
| { |
| gsCwq.eConfig |= CACHEOP_CONFIG_URBF; |
| } |
| } |
| else |
| { |
| if (ui32Config & CACHEOP_CONFIG_KRBF) |
| { |
| gsCwq.eConfig |= CACHEOP_CONFIG_KRBF; |
| } |
| else |
| { |
| gsCwq.eConfig &= ~CACHEOP_CONFIG_KRBF; |
| } |
| |
| if (ui32Config & CACHEOP_CONFIG_KDF) |
| { |
| gsCwq.eConfig |= CACHEOP_CONFIG_KDF; |
| } |
| else |
| { |
| gsCwq.eConfig &= ~CACHEOP_CONFIG_KDF; |
| } |
| |
| if (!gsCwq.bNoGlobalFlushImpl && (ui32Config & CACHEOP_CONFIG_KGF)) |
| { |
| gsCwq.eConfig |= CACHEOP_CONFIG_KGF; |
| } |
| else |
| { |
| gsCwq.eConfig &= ~CACHEOP_CONFIG_KGF; |
| } |
| |
| if (gsCwq.bSupportsUMFlush && (ui32Config & CACHEOP_CONFIG_URBF)) |
| { |
| gsCwq.eConfig |= CACHEOP_CONFIG_URBF; |
| } |
| else |
| { |
| gsCwq.eConfig &= ~CACHEOP_CONFIG_URBF; |
| } |
| } |
| |
| if (ui32Config & CACHEOP_CONFIG_KLOG) |
| { |
| /* Suppress logs from KM caller */ |
| gsCwq.eConfig |= CACHEOP_CONFIG_KLOG; |
| } |
| else |
| { |
| gsCwq.eConfig &= ~CACHEOP_CONFIG_KLOG; |
| } |
| |
| /* Step 1, set gsCwq.ui32Config based on gsCwq.eConfig */ |
| ui32Config = 0; |
| if (gsCwq.eConfig & CACHEOP_CONFIG_KRBF) |
| { |
| ui32Config |= CACHEOP_CONFIG_KRBF; |
| } |
| if (gsCwq.eConfig & CACHEOP_CONFIG_KDF) |
| { |
| ui32Config |= CACHEOP_CONFIG_KDF; |
| } |
| if (gsCwq.eConfig & CACHEOP_CONFIG_KGF) |
| { |
| ui32Config |= CACHEOP_CONFIG_KGF; |
| } |
| if (gsCwq.eConfig & CACHEOP_CONFIG_URBF) |
| { |
| ui32Config |= CACHEOP_CONFIG_URBF; |
| } |
| if (gsCwq.eConfig & CACHEOP_CONFIG_KLOG) |
| { |
| ui32Config |= CACHEOP_CONFIG_KLOG; |
| } |
| gsCwq.ui32Config = ui32Config; |
| |
| /* Step 2, Bar RBF promotion to GF, unless a GF is implemented */ |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD] = (IMG_UINT32)~0; |
| if (! gsCwq.bNoGlobalFlushImpl) |
| { |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD] = (IMG_UINT32)PVR_DIRTY_BYTES_FLUSH_THRESHOLD; |
| } |
| |
| /* Step 3, in certain cases where a CacheOp/VA is provided, this threshold determines at what point |
| the optimisation due to the presence of said VA (i.e. us not having to remap the PMR pages in KM) |
| is clawed-back because of the overhead of maintaining such large request which might stalls the |
| user thread; so to hide this latency have these CacheOps executed on deferred CacheOp thread */ |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD] = (IMG_UINT32)(PVR_DIRTY_BYTES_FLUSH_THRESHOLD >> 2); |
| |
| /* Step 4, if no UM support, all requests are done in KM so zero these forcing all client requests |
| to come down into the KM for maintenance */ |
| gsCwq.pui32InfoPage[CACHEOP_INFO_UMKMTHRESHLD] = 0; |
| gsCwq.pui32InfoPage[CACHEOP_INFO_UMRBFONLY] = 0; |
| if (gsCwq.bSupportsUMFlush) |
| { |
| /* If URBF has been selected exclusively OR selected but there is no GF implementation */ |
| if ((gsCwq.eConfig & CACHEOP_CONFIG_URBF) && |
| (gsCwq.bNoGlobalFlushImpl || !((gsCwq.ui32Config & (CACHEOP_CONFIG_LAST-1)) & ~CACHEOP_CONFIG_URBF))) |
| { |
| /* If only URBF has been selected, simulate without GF support OR no GF support means all client |
| requests should be done in UM. In both cases, set this threshold to the highest value to |
| prevent any client requests coming down to the server for maintenance. */ |
| gsCwq.pui32InfoPage[CACHEOP_INFO_UMKMTHRESHLD] = (IMG_UINT32)~0; |
| gsCwq.pui32InfoPage[CACHEOP_INFO_UMRBFONLY] = 1; |
| } |
| /* This is the default entry for setting the UM info. page entries */ |
| else if ((gsCwq.eConfig & CACHEOP_CONFIG_URBF) && !gsCwq.bNoGlobalFlushImpl) |
| { |
| /* Set UM/KM threshold, all request sizes above this goes to server for GF maintenance _only_ |
| because UM flushes already have VA acquired, no cost is incurred in per-page (re)mapping |
| of the to-be maintained PMR/page(s) as it the case with KM flushing so disallow KDF */ |
| |
| /* Assume an average UM flush performance, anything above should be promoted to GF. |
| For x86 UMA/LMA, we avoid KDF because remapping PMR/pages in KM might fail due to exhausted |
| or fragmented VMALLOC kernel VA space */ |
| gsCwq.pui32InfoPage[CACHEOP_INFO_UMKMTHRESHLD] = gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD]; |
| } |
| } |
| |
| /* Step 5, reset stats. */ |
| CacheOpStatsReset(); |
| |
| OSLockRelease(gsCwq.hConfigLock); |
| } |
| |
| static INLINE void CacheOpConfigRead(void *pvFilePtr, |
| void *pvData, |
| OS_STATS_PRINTF_FUNC* pfnOSStatsPrintf) |
| { |
| PVR_UNREFERENCED_PARAMETER(pvData); |
| pfnOSStatsPrintf(pvFilePtr, |
| "KDF: %s, URBF: %s, KGF: %s, KRBF: %s\n", |
| gsCwq.eConfig & CACHEOP_CONFIG_KDF ? "Yes" : "No", |
| gsCwq.eConfig & CACHEOP_CONFIG_URBF ? "Yes" : "No", |
| gsCwq.eConfig & CACHEOP_CONFIG_KGF ? "Yes" : "No", |
| gsCwq.eConfig & CACHEOP_CONFIG_KRBF ? "Yes" : "No" |
| ); |
| } |
| |
| static INLINE PVRSRV_ERROR CacheOpConfigQuery(const PVRSRV_DEVICE_NODE *psDevNode, |
| const void *psPrivate, |
| IMG_UINT32 *pui32Value) |
| { |
| IMG_UINT32 ui32ID = (IMG_UINT32)(uintptr_t) psPrivate; |
| PVR_UNREFERENCED_PARAMETER(psDevNode); |
| |
| switch (ui32ID) |
| { |
| case APPHINT_ID_CacheOpConfig: |
| *pui32Value = gsCwq.ui32Config; |
| break; |
| |
| case APPHINT_ID_CacheOpGFThresholdSize: |
| *pui32Value = gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD]; |
| break; |
| |
| case APPHINT_ID_CacheOpUMKMThresholdSize: |
| *pui32Value = gsCwq.pui32InfoPage[CACHEOP_INFO_UMKMTHRESHLD]; |
| break; |
| |
| default: |
| break; |
| } |
| |
| return PVRSRV_OK; |
| } |
| |
| static INLINE PVRSRV_ERROR CacheOpConfigSet(const PVRSRV_DEVICE_NODE *psDevNode, |
| const void *psPrivate, |
| IMG_UINT32 ui32Value) |
| { |
| IMG_UINT32 ui32ID = (IMG_UINT32)(uintptr_t) psPrivate; |
| PVR_UNREFERENCED_PARAMETER(psDevNode); |
| |
| switch (ui32ID) |
| { |
| case APPHINT_ID_CacheOpConfig: |
| CacheOpConfigUpdate(ui32Value & CACHEOP_CONFIG_ALL); |
| break; |
| |
| case APPHINT_ID_CacheOpGFThresholdSize: |
| { |
| if (!ui32Value || gsCwq.bNoGlobalFlushImpl) |
| { |
| /* CPU ISA does not support GF, silently ignore request to adjust threshold */ |
| PVR_ASSERT(gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD] == (IMG_UINT32)~0); |
| break; |
| } |
| else if (ui32Value < gsCwq.uiPageSize) |
| { |
| /* Silently round-up to OS page size */ |
| ui32Value = gsCwq.uiPageSize; |
| } |
| |
| /* Align to OS page size */ |
| ui32Value &= ~(gsCwq.uiPageSize - 1); |
| |
| /* Adjust KM deferred threshold given this updated KM global threshold */ |
| if (ui32Value == gsCwq.uiPageSize || ui32Value < gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD]) |
| { |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD] = ui32Value >> 2; |
| } |
| |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD] = ui32Value; |
| |
| break; |
| } |
| |
| case APPHINT_ID_CacheOpUMKMThresholdSize: |
| { |
| if (!ui32Value || !gsCwq.bSupportsUMFlush) |
| { |
| /* CPU ISA does not support UM flush, therefore every request goes down into |
| the KM, silently ignore request to adjust threshold */ |
| PVR_ASSERT(! gsCwq.pui32InfoPage[CACHEOP_INFO_UMKMTHRESHLD]); |
| break; |
| } |
| else if (ui32Value < gsCwq.uiPageSize) |
| { |
| /* Silently round-up to OS page size */ |
| ui32Value = gsCwq.uiPageSize; |
| } |
| |
| /* Align to OS page size */ |
| ui32Value &= ~(gsCwq.uiPageSize - 1); |
| |
| if (gsCwq.bNoGlobalFlushImpl || ui32Value < gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD]) |
| { |
| /* CPU ISA does not support GF also, therefore it seems there is no benefit to |
| re-routing this to KM as request won't be promoted to GF but request can |
| benefit from KM async. execution so ensure KM deferred threshold applies */ |
| PVR_ASSERT(gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD] == (IMG_UINT32)~0); |
| gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD] = ui32Value >> 1; |
| break; |
| } |
| |
| gsCwq.pui32InfoPage[CACHEOP_INFO_UMKMTHRESHLD] = ui32Value; |
| |
| break; |
| } |
| |
| default: |
| break; |
| } |
| |
| return PVRSRV_OK; |
| } |
| |
| static INLINE void CacheOpQItemRecycle(CACHEOP_WORK_ITEM *psCacheOpWorkItem) |
| { |
| PVRSRV_ERROR eError; |
| eError = PMRUnlockSysPhysAddresses(psCacheOpWorkItem->psPMR); |
| PVR_LOG_IF_ERROR(eError, "PMRUnlockSysPhysAddresses"); |
| /* Set to max as precaution should recycling this CacheOp index fail |
| to reset it, this is purely to safe-guard having to discard such |
| subsequent deferred CacheOps or signal the sw sync timeline */ |
| psCacheOpWorkItem->iTimeline = PVRSRV_NO_TIMELINE; |
| psCacheOpWorkItem->ui32GFSeqNum = (IMG_UINT32)~0; |
| psCacheOpWorkItem->ui32OpSeqNum = (IMG_UINT32)~0; |
| #if defined(CACHEOP_DEBUG) |
| psCacheOpWorkItem->psPMR = (void *)(uintptr_t)~0; |
| #endif |
| } |
| |
| static INLINE void CacheOpQItemReadCheck(CACHEOP_WORK_ITEM *psCacheOpWorkItem) |
| { |
| #if defined(CACHEOP_DEBUG) |
| CACHEOP_PVR_ASSERT(psCacheOpWorkItem->psPMR); |
| CACHEOP_PVR_ASSERT(psCacheOpWorkItem->psPMR != (void *)(uintptr_t)~0); |
| CACHEOP_PVR_ASSERT(psCacheOpWorkItem->ui32OpSeqNum != (IMG_UINT32)~0); |
| if (CacheOpConfigSupports(CACHEOP_CONFIG_KGF)) |
| { |
| CACHEOP_PVR_ASSERT(psCacheOpWorkItem->ui32GFSeqNum != (IMG_UINT32)~0); |
| } |
| #else |
| PVR_UNREFERENCED_PARAMETER(psCacheOpWorkItem); |
| #endif |
| } |
| |
| static INLINE void CacheOpQItemWriteCheck(CACHEOP_WORK_ITEM *psCacheOpWorkItem) |
| { |
| #if defined(CACHEOP_DEBUG) |
| CACHEOP_PVR_ASSERT(psCacheOpWorkItem->psPMR == (void *)(uintptr_t)~0); |
| CACHEOP_PVR_ASSERT(psCacheOpWorkItem->ui32OpSeqNum == (IMG_UINT32)~0); |
| CACHEOP_PVR_ASSERT(psCacheOpWorkItem->ui32GFSeqNum == (IMG_UINT32)~0); |
| CACHEOP_PVR_ASSERT(psCacheOpWorkItem->iTimeline == PVRSRV_NO_TIMELINE); |
| #else |
| PVR_UNREFERENCED_PARAMETER(psCacheOpWorkItem); |
| #endif |
| } |
| |
| static INLINE IMG_UINT32 CacheOpGetNextCommonSeqNum(void) |
| { |
| IMG_UINT32 ui32SeqNum = OSAtomicIncrement(&gsCwq.hCommonSeqNum); |
| if (! ui32SeqNum) |
| { |
| /* Zero is _not_ a valid sequence value, doing so simplifies _all_ |
| subsequent fence checking when no cache maintenance operation |
| is outstanding as in this case a fence value of zero is supplied. */ |
| if (CacheOpConfigSupports(CACHEOP_CONFIG_KGF)) |
| { |
| /* Also when seqNum wraps around/crosses zero, this requires us to |
| ensure that GFSEQNUM is not erroneously higher than any/all client |
| seqNum(s) in the system during this wrap-around transition so we |
| disable both momentarily until the next GF comes along. This has |
| the effect that all subsequent in-flight discards using ">" is |
| never true seeing zero is _not_ greater than anything and all |
| "<=" comparison are always true seeing zero is always less than |
| all non-zero integers. The additional GF here is done mostly to |
| account for race condition(s) during this transition for all |
| pending seqNum(s) that are still behind zero. */ |
| gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] = 0; |
| gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM1] = 0; |
| ui32SeqNum = OSAtomicIncrement(&gsCwq.hCommonSeqNum); |
| (void) OSCPUOperation(PVRSRV_CACHE_OP_FLUSH); |
| } |
| else |
| { |
| ui32SeqNum = OSAtomicIncrement(&gsCwq.hCommonSeqNum); |
| } |
| } |
| return ui32SeqNum; |
| } |
| |
| static INLINE IMG_BOOL CacheOpFenceCheck(IMG_UINT32 ui32CompletedSeqNum, |
| IMG_UINT32 ui32FenceSeqNum) |
| { |
| IMG_UINT32 ui32RebasedCompletedNum; |
| IMG_UINT32 ui32RebasedFenceNum; |
| IMG_UINT32 ui32Rebase; |
| |
| if (ui32FenceSeqNum == 0) |
| { |
| return IMG_TRUE; |
| } |
| |
| /* |
| The problem statement is how to compare two values on |
| a numerical sequentially incrementing timeline in the |
| presence of wrap around arithmetic semantics using a |
| single ui32 counter & atomic (increment) operations. |
| |
| The rationale for the solution here is to rebase the |
| incoming values to the sequence midpoint and perform |
| comparisons there; this allows us to handle overflow |
| or underflow wrap-round using only a single integer. |
| |
| NOTE: Here we assume that the absolute value of the |
| difference between the two incoming values in _not_ |
| greater than CACHEOP_SEQ_MIDPOINT. This assumption |
| holds as it implies that it is very _unlikely_ that 2 |
| billion CacheOp requests could have been made between |
| a single client's CacheOp request & the corresponding |
| fence check. This code sequence is hopefully a _more_ |
| hand optimised (branchless) version of this: |
| |
| x = ui32CompletedOpSeqNum |
| y = ui32FenceOpSeqNum |
| |
| if (|x - y| < CACHEOP_SEQ_MIDPOINT) |
| return (x - y) >= 0 ? true : false |
| else |
| return (y - x) >= 0 ? true : false |
| */ |
| ui32Rebase = CACHEOP_SEQ_MIDPOINT - ui32CompletedSeqNum; |
| |
| /* ui32Rebase could be either positive/negative, in |
| any case we still perform operation using unsigned |
| semantics as 2's complement notation always means |
| we end up with the correct result */ |
| ui32RebasedCompletedNum = ui32Rebase + ui32CompletedSeqNum; |
| ui32RebasedFenceNum = ui32Rebase + ui32FenceSeqNum; |
| |
| return (ui32RebasedCompletedNum >= ui32RebasedFenceNum); |
| } |
| |
| static INLINE PVRSRV_ERROR CacheOpTimelineBind(PVRSRV_DEVICE_NODE *psDevNode, |
| CACHEOP_WORK_ITEM *psCacheOpWorkItem, |
| PVRSRV_TIMELINE iTimeline) |
| { |
| PVRSRV_ERROR eError; |
| |
| /* Always default the incoming CacheOp work-item to safe values */ |
| SyncClearTimelineObj(&psCacheOpWorkItem->sSWTimelineObj); |
| psCacheOpWorkItem->iTimeline = PVRSRV_NO_TIMELINE; |
| psCacheOpWorkItem->psDevNode = psDevNode; |
| if (iTimeline == PVRSRV_NO_TIMELINE) |
| { |
| return PVRSRV_OK; |
| } |
| |
| #if defined(PVR_USE_FENCE_SYNC_MODEL) |
| psCacheOpWorkItem->iTimeline = iTimeline; |
| eError = SyncSWGetTimelineObj(iTimeline, &psCacheOpWorkItem->sSWTimelineObj); |
| PVR_LOG_IF_ERROR(eError, "SyncSWGetTimelineObj"); |
| #else |
| eError = PVRSRV_ERROR_NOT_IMPLEMENTED; |
| #endif |
| |
| return eError; |
| } |
| |
| static INLINE PVRSRV_ERROR CacheOpTimelineExec(CACHEOP_WORK_ITEM *psCacheOpWorkItem) |
| { |
| PVRSRV_ERROR eError; |
| |
| if (psCacheOpWorkItem->iTimeline == PVRSRV_NO_TIMELINE) |
| { |
| return PVRSRV_OK; |
| } |
| CACHEOP_PVR_ASSERT(psCacheOpWorkItem->sSWTimelineObj.pvTlObj); |
| |
| #if defined(PVR_USE_FENCE_SYNC_MODEL) |
| eError = SyncSWTimelineAdvanceKM(psCacheOpWorkItem->psDevNode, |
| &psCacheOpWorkItem->sSWTimelineObj); |
| (void) SyncSWTimelineReleaseKM(&psCacheOpWorkItem->sSWTimelineObj); |
| #else |
| eError = PVRSRV_ERROR_NOT_IMPLEMENTED; |
| #endif |
| |
| return eError; |
| } |
| |
| static INLINE PVRSRV_ERROR CacheOpGlobalFlush(void) |
| { |
| #if !defined(CACHEFLUSH_ISA_SUPPORTS_GLOBAL_FLUSH) |
| return PVRSRV_ERROR_NOT_IMPLEMENTED; |
| #else |
| PVRSRV_ERROR eError; |
| IMG_UINT32 ui32OpSeqNum = gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0]; |
| |
| if (! CacheOpConfigSupports(CACHEOP_CONFIG_KGF)) |
| { |
| return PVRSRV_ERROR_NOT_SUPPORTED; |
| } |
| |
| OSLockAcquire(gsCwq.hGlobalFlushLock); |
| if (ui32OpSeqNum < gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0]) |
| { |
| #if defined(CACHEOP_DEBUG) |
| gsCwq.ui32KMDiscards += 1; |
| #endif |
| eError = PVRSRV_OK; |
| goto exit; |
| } |
| |
| /* User space sampling the information-page seqNumbers after this point |
| and before the corresponding GFSEQNUM0 update leads to an invalid |
| sampling which must be discarded by UM. This implements a lockless |
| critical region for a single KM(writer) & multiple UM/KM(readers) */ |
| ui32OpSeqNum = CacheOpGetNextCommonSeqNum(); |
| gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM1] = ui32OpSeqNum; |
| |
| eError = OSCPUOperation(PVRSRV_CACHE_OP_FLUSH); |
| PVR_LOGG_IF_ERROR(eError, "OSCPUOperation(PVRSRV_CACHE_OP_FLUSH)", exit); |
| |
| gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] = ui32OpSeqNum; |
| OSAtomicWrite(&gsCwq.hDeferredSize, 0); |
| #if defined(CACHEOP_DEBUG) |
| gsCwq.ui32ServerGF += 1; |
| #endif |
| |
| exit: |
| OSLockRelease(gsCwq.hGlobalFlushLock); |
| return eError; |
| #endif |
| } |
| |
| static INLINE void CacheOpExecRangeBased(PVRSRV_DEVICE_NODE *psDevNode, |
| PVRSRV_CACHE_OP uiCacheOp, |
| IMG_BYTE *pbCpuVirtAddr, |
| IMG_CPU_PHYADDR sCpuPhyAddr, |
| IMG_DEVMEM_OFFSET_T uiPgAlignedOffset, |
| IMG_DEVMEM_OFFSET_T uiCLAlignedStartOffset, |
| IMG_DEVMEM_OFFSET_T uiCLAlignedEndOffset) |
| { |
| IMG_BYTE *pbCpuVirtAddrEnd; |
| IMG_BYTE *pbCpuVirtAddrStart; |
| IMG_CPU_PHYADDR sCpuPhyAddrEnd; |
| IMG_CPU_PHYADDR sCpuPhyAddrStart; |
| IMG_DEVMEM_SIZE_T uiRelFlushSize; |
| IMG_DEVMEM_OFFSET_T uiRelFlushOffset; |
| IMG_DEVMEM_SIZE_T uiNextPgAlignedOffset; |
| |
| /* These quantities allows us to perform cache operations |
| at cache-line granularity thereby ensuring we do not |
| perform more than is necessary */ |
| CACHEOP_PVR_ASSERT(uiPgAlignedOffset < uiCLAlignedEndOffset); |
| uiRelFlushSize = (IMG_DEVMEM_SIZE_T)gsCwq.uiPageSize; |
| uiRelFlushOffset = 0; |
| |
| if (uiCLAlignedStartOffset > uiPgAlignedOffset) |
| { |
| /* Zero unless initially starting at an in-page offset */ |
| uiRelFlushOffset = uiCLAlignedStartOffset - uiPgAlignedOffset; |
| uiRelFlushSize -= uiRelFlushOffset; |
| } |
| |
| /* uiRelFlushSize is gsCwq.uiPageSize unless current outstanding CacheOp |
| size is smaller. The 1st case handles in-page CacheOp range and |
| the 2nd case handles multiple-page CacheOp range with a last |
| CacheOp size that is less than gsCwq.uiPageSize */ |
| uiNextPgAlignedOffset = uiPgAlignedOffset + (IMG_DEVMEM_SIZE_T)gsCwq.uiPageSize; |
| if (uiNextPgAlignedOffset < uiPgAlignedOffset) |
| { |
| /* uiNextPgAlignedOffset is greater than uiCLAlignedEndOffset |
| by implication of this wrap-round; this only happens when |
| uiPgAlignedOffset is the last page aligned offset */ |
| uiRelFlushSize = uiRelFlushOffset ? |
| uiCLAlignedEndOffset - uiCLAlignedStartOffset : |
| uiCLAlignedEndOffset - uiPgAlignedOffset; |
| } |
| else |
| { |
| if (uiNextPgAlignedOffset > uiCLAlignedEndOffset) |
| { |
| uiRelFlushSize = uiRelFlushOffset ? |
| uiCLAlignedEndOffset - uiCLAlignedStartOffset : |
| uiCLAlignedEndOffset - uiPgAlignedOffset; |
| } |
| } |
| |
| /* More efficient to request cache maintenance operation for full |
| relative range as opposed to multiple cache-aligned ranges */ |
| sCpuPhyAddrStart.uiAddr = sCpuPhyAddr.uiAddr + uiRelFlushOffset; |
| sCpuPhyAddrEnd.uiAddr = sCpuPhyAddrStart.uiAddr + uiRelFlushSize; |
| if (pbCpuVirtAddr) |
| { |
| pbCpuVirtAddrStart = pbCpuVirtAddr + uiRelFlushOffset; |
| pbCpuVirtAddrEnd = pbCpuVirtAddrStart + uiRelFlushSize; |
| } |
| else |
| { |
| /* Some OS/Env layer support functions expect NULL(s) */ |
| pbCpuVirtAddrStart = NULL; |
| pbCpuVirtAddrEnd = NULL; |
| } |
| |
| /* Perform requested CacheOp on the CPU data cache for successive cache |
| line worth of bytes up to page or in-page cache-line boundary */ |
| switch (uiCacheOp) |
| { |
| case PVRSRV_CACHE_OP_CLEAN: |
| OSCPUCacheCleanRangeKM(psDevNode, pbCpuVirtAddrStart, pbCpuVirtAddrEnd, |
| sCpuPhyAddrStart, sCpuPhyAddrEnd); |
| break; |
| case PVRSRV_CACHE_OP_INVALIDATE: |
| OSCPUCacheInvalidateRangeKM(psDevNode, pbCpuVirtAddrStart, pbCpuVirtAddrEnd, |
| sCpuPhyAddrStart, sCpuPhyAddrEnd); |
| break; |
| case PVRSRV_CACHE_OP_FLUSH: |
| OSCPUCacheFlushRangeKM(psDevNode, pbCpuVirtAddrStart, pbCpuVirtAddrEnd, |
| sCpuPhyAddrStart, sCpuPhyAddrEnd); |
| break; |
| default: |
| PVR_DPF((PVR_DBG_ERROR, "%s: Invalid cache operation type %d", |
| __func__, uiCacheOp)); |
| break; |
| } |
| |
| #if defined(CACHEOP_DEBUG) |
| /* Tracks the number of kernel-mode cacheline maintenance instructions */ |
| gsCwq.ui32ServerRBF += (uiRelFlushSize & ((IMG_DEVMEM_SIZE_T)~(gsCwq.uiLineSize - 1))) >> gsCwq.uiLineShift; |
| #endif |
| } |
| |
| static INLINE void CacheOpExecRangeBasedVA(PVRSRV_DEVICE_NODE *psDevNode, |
| IMG_CPU_VIRTADDR pvAddress, |
| IMG_DEVMEM_SIZE_T uiSize, |
| PVRSRV_CACHE_OP uiCacheOp) |
| { |
| IMG_CPU_PHYADDR sCpuPhyAddrUnused = |
| { IMG_CAST_TO_CPUPHYADDR_UINT(0xCAFEF00DDEADBEEFULL) }; |
| IMG_BYTE *pbEnd = (IMG_BYTE*)((uintptr_t)pvAddress + (uintptr_t)uiSize); |
| IMG_BYTE *pbStart = (IMG_BYTE*)((uintptr_t)pvAddress & ~((uintptr_t)gsCwq.uiLineSize-1)); |
| |
| /* |
| If the start/end address isn't aligned to cache line size, round it up to the |
| nearest multiple; this ensures that we flush all the cache lines affected by |
| unaligned start/end addresses. |
| */ |
| pbEnd = (IMG_BYTE *) PVR_ALIGN((uintptr_t)pbEnd, (uintptr_t)gsCwq.uiLineSize); |
| switch (uiCacheOp) |
| { |
| case PVRSRV_CACHE_OP_CLEAN: |
| OSCPUCacheCleanRangeKM(psDevNode, pbStart, pbEnd, sCpuPhyAddrUnused, sCpuPhyAddrUnused); |
| break; |
| case PVRSRV_CACHE_OP_INVALIDATE: |
| OSCPUCacheInvalidateRangeKM(psDevNode, pbStart, pbEnd, sCpuPhyAddrUnused, sCpuPhyAddrUnused); |
| break; |
| case PVRSRV_CACHE_OP_FLUSH: |
| OSCPUCacheFlushRangeKM(psDevNode, pbStart, pbEnd, sCpuPhyAddrUnused, sCpuPhyAddrUnused); |
| break; |
| default: |
| PVR_DPF((PVR_DBG_ERROR, "%s: Invalid cache operation type %d", |
| __func__, uiCacheOp)); |
| break; |
| } |
| |
| #if defined(CACHEOP_DEBUG) |
| /* Tracks the number of kernel-mode cacheline maintenance instructions */ |
| gsCwq.ui32ServerRBF += (uiSize & ((IMG_DEVMEM_SIZE_T)~(gsCwq.uiLineSize - 1))) >> gsCwq.uiLineShift; |
| #endif |
| } |
| |
| static INLINE PVRSRV_ERROR CacheOpValidateVAOffset(PMR *psPMR, |
| IMG_CPU_VIRTADDR pvAddress, |
| IMG_DEVMEM_OFFSET_T uiOffset, |
| IMG_DEVMEM_SIZE_T uiSize, |
| void **ppvOutAddress) |
| { |
| PVRSRV_ERROR eError = PVRSRV_OK; |
| #if defined(LINUX) && !defined(CACHEFLUSH_NO_KMRBF_USING_UMVA) |
| struct mm_struct *mm = current->mm; |
| struct vm_area_struct *vma; |
| #endif |
| void __user *pvAddr; |
| |
| if (! pvAddress) |
| { |
| /* As pvAddress is optional, NULL is expected from UM/KM requests */ |
| pvAddr = NULL; |
| goto e0; |
| } |
| |
| #if !defined(LINUX) || defined(CACHEFLUSH_NO_KMRBF_USING_UMVA) |
| pvAddr = NULL; |
| #else |
| /* Validate VA, assume most basic address limit access_ok() check */ |
| pvAddr = (void __user *)(uintptr_t)((uintptr_t)pvAddress + uiOffset); |
| if (!access_ok(pvAddr, uiSize)) |
| { |
| pvAddr = NULL; |
| if (! mm) |
| { |
| /* Bad KM request, don't silently ignore */ |
| eError = PVRSRV_ERROR_INVALID_CPU_ADDR; |
| goto e0; |
| } |
| } |
| else if (mm) |
| { |
| down_read(&mm->mmap_sem); |
| |
| vma = find_vma(mm, (unsigned long)(uintptr_t)pvAddr); |
| if (!vma || |
| vma->vm_start > (unsigned long)(uintptr_t)pvAddr || |
| vma->vm_end - vma->vm_start > (unsigned long)(uintptr_t)uiSize) |
| { |
| /* Out of range mm_struct->vm_area VA */ |
| pvAddr = NULL; |
| } |
| else if (vma->vm_private_data != psPMR) |
| { |
| /* |
| Unknown mm_struct->vm_area VA, can't risk dcache maintenance using |
| this VA as the client user space mapping could be removed without |
| us knowing which might induce CPU fault during cache maintenance. |
| */ |
| pvAddr = NULL; |
| } |
| else if ((uintptr_t)pvAddress < (uintptr_t)gsCwq.uiPageSize) |
| { |
| /* Silently suppress UM NULL page pointers */ |
| pvAddr = NULL; |
| } |
| |
| up_read(&mm->mmap_sem); |
| } |
| else |
| { |
| pgd_t *pgd; |
| p4d_t *p4d; |
| pud_t *pud; |
| pmd_t *pmd; |
| pte_t *ptep; |
| mm = current->active_mm; |
| |
| /* |
| For KM requests perform additional VA validation, so we walk the |
| kernel page-table structures to be sure VA is safe to use. |
| */ |
| pgd = pgd_offset(mm, (uintptr_t)pvAddr); |
| if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) |
| { |
| eError = PVRSRV_ERROR_INVALID_CPU_ADDR; |
| pvAddr = NULL; |
| goto e0; |
| } |
| |
| p4d = p4d_offset(pgd, (uintptr_t)pvAddr); |
| if (p4d_none(*p4d) || unlikely(p4d_bad(*p4d))) |
| { |
| eError = PVRSRV_ERROR_INVALID_CPU_ADDR; |
| pvAddr = NULL; |
| goto e0; |
| } |
| |
| pud = pud_offset(p4d, (uintptr_t)pvAddr); |
| if (pud_none(*pud) || unlikely(pud_bad(*pud))) |
| { |
| eError = PVRSRV_ERROR_INVALID_CPU_ADDR; |
| pvAddr = NULL; |
| goto e0; |
| } |
| |
| pmd = pmd_offset(pud, (uintptr_t)pvAddr); |
| if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) |
| { |
| eError = PVRSRV_ERROR_INVALID_CPU_ADDR; |
| pvAddr = NULL; |
| goto e0; |
| } |
| |
| ptep = pte_offset_map(pmd, (uintptr_t)pvAddr); |
| if (!ptep || !pte_present(*ptep)) |
| { |
| eError = PVRSRV_ERROR_INVALID_CPU_ADDR; |
| pvAddr = NULL; |
| goto e0; |
| } |
| } |
| #endif |
| |
| e0: |
| *ppvOutAddress = (IMG_CPU_VIRTADDR __force) pvAddr; |
| return eError; |
| } |
| |
| static PVRSRV_ERROR CacheOpPMRExec (PMR *psPMR, |
| IMG_CPU_VIRTADDR pvAddress, |
| IMG_DEVMEM_OFFSET_T uiOffset, |
| IMG_DEVMEM_SIZE_T uiSize, |
| PVRSRV_CACHE_OP uiCacheOp, |
| IMG_UINT32 ui32GFlushSeqNum, |
| IMG_BOOL bIsRequestValidated, |
| IMG_BOOL *pbUsedGlobalFlush) |
| { |
| IMG_HANDLE hPrivOut; |
| IMG_BOOL bPMRIsSparse; |
| IMG_UINT32 ui32PageIndex; |
| IMG_UINT32 ui32NumOfPages; |
| IMG_DEVMEM_SIZE_T uiOutSize; |
| PVRSRV_DEVICE_NODE *psDevNode; |
| IMG_DEVMEM_SIZE_T uiPgAlignedSize; |
| IMG_DEVMEM_OFFSET_T uiPgAlignedOffset; |
| IMG_DEVMEM_OFFSET_T uiCLAlignedEndOffset; |
| IMG_DEVMEM_OFFSET_T uiPgAlignedEndOffset; |
| IMG_DEVMEM_OFFSET_T uiCLAlignedStartOffset; |
| IMG_DEVMEM_OFFSET_T uiPgAlignedStartOffset; |
| IMG_BOOL abValid[PMR_MAX_TRANSLATION_STACK_ALLOC]; |
| IMG_CPU_PHYADDR asCpuPhyAddr[PMR_MAX_TRANSLATION_STACK_ALLOC]; |
| IMG_CPU_PHYADDR *psCpuPhyAddr = asCpuPhyAddr; |
| IMG_BOOL bIsPMRInfoValid = IMG_FALSE; |
| PVRSRV_ERROR eError = PVRSRV_OK; |
| IMG_BYTE *pbCpuVirtAddr = NULL; |
| IMG_BOOL *pbValid = abValid; |
| *pbUsedGlobalFlush = IMG_FALSE; |
| |
| if (uiCacheOp == PVRSRV_CACHE_OP_NONE || uiCacheOp == PVRSRV_CACHE_OP_TIMELINE) |
| { |
| return PVRSRV_OK; |
| } |
| |
| /* Check for explicitly requested-for KGF or KRBF promoted to KGF requests */ |
| if (uiCacheOp == PVRSRV_CACHE_OP_GLOBAL || uiSize == 0 || |
| (IMG_UINT32)uiSize > gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD]) |
| { |
| /* Discard if an else-when KGF has occurred in the interim time */ |
| if (gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] > ui32GFlushSeqNum) |
| { |
| #if defined(CACHEOP_DEBUG) |
| gsCwq.ui32KMDiscards += 1; |
| #endif |
| return PVRSRV_OK; |
| } |
| /* Some CPU ISA support KGF, if it fails fall-back to KRBF */ |
| else if ((eError = CacheOpGlobalFlush()) == PVRSRV_OK) |
| { |
| *pbUsedGlobalFlush = IMG_TRUE; |
| return PVRSRV_OK; |
| } |
| /* Request with uiSize=0 is treated as a KGF request as well */ |
| else if (uiCacheOp == PVRSRV_CACHE_OP_GLOBAL || uiSize == 0) |
| { |
| /* Cannot fall-back to KRBF as an explicit KGF was erroneously requested for */ |
| PVR_LOGR_IF_ERROR(eError, CACHEOP_NO_GFLUSH_ERROR_STRING); |
| CACHEOP_PVR_ASSERT(0); |
| } |
| } |
| |
| if (! bIsRequestValidated) |
| { |
| IMG_DEVMEM_SIZE_T uiLPhysicalSize; |
| |
| /* Need to validate parameters before proceeding */ |
| eError = PMR_PhysicalSize(psPMR, &uiLPhysicalSize); |
| PVR_LOGR_IF_ERROR(eError, "uiLPhysicalSize"); |
| |
| PVR_LOGR_IF_FALSE(((uiOffset+uiSize) <= uiLPhysicalSize), CACHEOP_DEVMEM_OOR_ERROR_STRING, PVRSRV_ERROR_DEVICEMEM_OUT_OF_RANGE); |
| |
| eError = PMRLockSysPhysAddresses(psPMR); |
| PVR_LOGR_IF_ERROR(eError, "PMRLockSysPhysAddresses"); |
| } |
| |
| /* Fast track the request if a CPU VA is provided and CPU ISA supports VA only maintenance */ |
| eError = CacheOpValidateVAOffset(psPMR, pvAddress, uiOffset, uiSize, (void**)&pbCpuVirtAddr); |
| if (eError == PVRSRV_OK) |
| { |
| pvAddress = pbCpuVirtAddr; |
| |
| if (pvAddress && gsCwq.uiCacheOpAddrType == PVRSRV_CACHE_OP_ADDR_TYPE_VIRTUAL) |
| { |
| CacheOpExecRangeBasedVA(PMR_DeviceNode(psPMR), pvAddress, uiSize, uiCacheOp); |
| if (! bIsRequestValidated) |
| { |
| eError = PMRUnlockSysPhysAddresses(psPMR); |
| PVR_LOG_IF_ERROR(eError, "PMRUnlockSysPhysAddresses"); |
| } |
| #if defined(CACHEOP_DEBUG) |
| gsCwq.ui32ServerSyncVA += 1; |
| #endif |
| return PVRSRV_OK; |
| } |
| else if (pvAddress) |
| { |
| /* Round down the incoming VA (if any) down to the nearest page aligned VA */ |
| pvAddress = (void*)((uintptr_t)pvAddress & ~((uintptr_t)gsCwq.uiPageSize-1)); |
| #if defined(CACHEOP_DEBUG) |
| gsCwq.ui32ServerSyncVA += 1; |
| #endif |
| } |
| } |
| else |
| { |
| /* |
| * This validation pathway has been added to accommodate any/all requests that might |
| * cause the kernel to Oops; essentially, KM requests should prevalidate cache maint. |
| * parameters but if this fails then we would rather fail gracefully than cause the |
| * kernel to Oops so instead we log the fact that an invalid KM virtual address was |
| * supplied and what action was taken to mitigate against kernel Oops(ing) if any. |
| */ |
| CACHEOP_PVR_ASSERT(pbCpuVirtAddr == NULL); |
| |
| if (gsCwq.uiCacheOpAddrType == PVRSRV_CACHE_OP_ADDR_TYPE_PHYSICAL) |
| { |
| PVR_DPF((PVR_DBG_WARNING, |
| "%s: Invalid vaddress 0x%p in CPU d-cache maint. op, using paddress", |
| __func__, |
| pvAddress)); |
| |
| /* We can still proceed as kernel/cpu uses CPU PA for d-cache maintenance */ |
| pvAddress = NULL; |
| } |
| else if (CacheOpGlobalFlush() == PVRSRV_OK) |
| { |
| PVR_DPF((PVR_DBG_WARNING, |
| "%s: Invalid vaddress 0x%p in CPU d-cache maint. op, used global flush", |
| __func__, |
| pvAddress)); |
| |
| /* Saved by global flush impl. */ |
| *pbUsedGlobalFlush = IMG_TRUE; |
| eError = PVRSRV_OK; |
| goto e0; |
| } |
| else |
| { |
| /* |
| * The approach here is to attempt a reacquisition of the PMR kernel VA and see if |
| * said VA corresponds to the parameter VA, if so fail requested cache maint. op. |
| * cause this indicates some kind of internal, memory and/or meta-data corruption |
| * else we reissue the request using this (re)acquired alias PMR kernel VA. |
| */ |
| if (PMR_IsSparse(psPMR)) |
| { |
| eError = PMRAcquireSparseKernelMappingData(psPMR, |
| 0, |
| gsCwq.uiPageSize, |
| (void **)&pbCpuVirtAddr, |
| (size_t*)&uiOutSize, |
| &hPrivOut); |
| PVR_LOGG_IF_ERROR(eError, "PMRAcquireSparseKernelMappingData", e0); |
| } |
| else |
| { |
| eError = PMRAcquireKernelMappingData(psPMR, |
| 0, |
| gsCwq.uiPageSize, |
| (void **)&pbCpuVirtAddr, |
| (size_t*)&uiOutSize, |
| &hPrivOut); |
| PVR_LOGG_IF_ERROR(eError, "PMRAcquireKernelMappingData", e0); |
| } |
| |
| /* Here, we only compare these CPU virtual addresses at granularity of the OS page size */ |
| if ((uintptr_t)pbCpuVirtAddr == ((uintptr_t)pvAddress & ~((uintptr_t)gsCwq.uiPageSize-1))) |
| { |
| PVR_DPF((PVR_DBG_ERROR, |
| "%s: Invalid vaddress 0x%p in CPU d-cache maint. op, no alt. so failing request", |
| __func__, |
| pvAddress)); |
| |
| eError = PMRReleaseKernelMappingData(psPMR, hPrivOut); |
| PVR_LOG_IF_ERROR(eError, "PMRReleaseKernelMappingData"); |
| |
| eError = PVRSRV_ERROR_INVALID_CPU_ADDR; |
| goto e0; |
| } |
| else if (gsCwq.uiCacheOpAddrType == PVRSRV_CACHE_OP_ADDR_TYPE_VIRTUAL) |
| { |
| PVR_DPF((PVR_DBG_WARNING, |
| "%s: Bad vaddress 0x%p in CPU d-cache maint. op, using reacquired vaddress 0x%p", |
| __func__, |
| pvAddress, |
| pbCpuVirtAddr)); |
| |
| /* Note that this might still fail if there is kernel memory/meta-data corruption; |
| there is not much we can do here but at the least we will be informed of this |
| before the kernel Oops(ing) */ |
| CacheOpExecRangeBasedVA(PMR_DeviceNode(psPMR), pbCpuVirtAddr, uiSize, uiCacheOp); |
| |
| eError = PMRReleaseKernelMappingData(psPMR, hPrivOut); |
| PVR_LOG_IF_ERROR(eError, "PMRReleaseKernelMappingData"); |
| |
| eError = PVRSRV_OK; |
| goto e0; |
| } |
| else |
| { |
| /* At this junction, we have exhausted every possible work-around possible but we do |
| know that VA reacquisition returned another/alias page-aligned VA; so with this |
| future expectation of PMRAcquireKernelMappingData(), we proceed */ |
| PVR_DPF((PVR_DBG_WARNING, |
| "%s: Bad vaddress %p in CPU d-cache maint. op, will use reacquired vaddress", |
| __func__, |
| pvAddress)); |
| |
| eError = PMRReleaseKernelMappingData(psPMR, hPrivOut); |
| PVR_LOG_IF_ERROR(eError, "PMRReleaseKernelMappingData"); |
| |
| /* NULL this to force per-page reacquisition down-stream */ |
| pvAddress = NULL; |
| } |
| } |
| } |
| |
| /* NULL clobbered var., OK to proceed */ |
| pbCpuVirtAddr = NULL; |
| eError = PVRSRV_OK; |
| |
| /* Need this for kernel mapping */ |
| bPMRIsSparse = PMR_IsSparse(psPMR); |
| psDevNode = PMR_DeviceNode(psPMR); |
| |
| /* Round the incoming offset down to the nearest cache-line / page aligned-address */ |
| uiCLAlignedEndOffset = uiOffset + uiSize; |
| uiCLAlignedEndOffset = PVR_ALIGN(uiCLAlignedEndOffset, (IMG_DEVMEM_SIZE_T)gsCwq.uiLineSize); |
| uiCLAlignedStartOffset = (uiOffset & ~((IMG_DEVMEM_OFFSET_T)gsCwq.uiLineSize-1)); |
| |
| uiPgAlignedEndOffset = uiCLAlignedEndOffset; |
| uiPgAlignedEndOffset = PVR_ALIGN(uiPgAlignedEndOffset, (IMG_DEVMEM_SIZE_T)gsCwq.uiPageSize); |
| uiPgAlignedStartOffset = (uiOffset & ~((IMG_DEVMEM_OFFSET_T)gsCwq.uiPageSize-1)); |
| uiPgAlignedSize = uiPgAlignedEndOffset - uiPgAlignedStartOffset; |
| |
| #if defined(CACHEOP_NO_CACHE_LINE_ALIGNED_ROUNDING) |
| /* For internal debug if cache-line optimised |
| flushing is suspected of causing data corruption */ |
| uiCLAlignedStartOffset = uiPgAlignedStartOffset; |
| uiCLAlignedEndOffset = uiPgAlignedEndOffset; |
| #endif |
| |
| /* Type of allocation backing the PMR data */ |
| ui32NumOfPages = uiPgAlignedSize >> gsCwq.uiPageShift; |
| if (ui32NumOfPages > PMR_MAX_TRANSLATION_STACK_ALLOC) |
| { |
| /* The pbValid array is allocated first as it is needed in |
| both physical/virtual cache maintenance methods */ |
| pbValid = OSAllocZMem(ui32NumOfPages * sizeof(IMG_BOOL)); |
| if (! pbValid) |
| { |
| pbValid = abValid; |
| } |
| else if (gsCwq.uiCacheOpAddrType != PVRSRV_CACHE_OP_ADDR_TYPE_VIRTUAL) |
| { |
| psCpuPhyAddr = OSAllocZMem(ui32NumOfPages * sizeof(IMG_CPU_PHYADDR)); |
| if (! psCpuPhyAddr) |
| { |
| psCpuPhyAddr = asCpuPhyAddr; |
| OSFreeMem(pbValid); |
| pbValid = abValid; |
| } |
| } |
| } |
| |
| /* We always retrieve PMR data in bulk, up-front if number of pages is within |
| PMR_MAX_TRANSLATION_STACK_ALLOC limits else we check to ensure that a |
| dynamic buffer has been allocated to satisfy requests outside limits */ |
| if (ui32NumOfPages <= PMR_MAX_TRANSLATION_STACK_ALLOC || pbValid != abValid) |
| { |
| if (gsCwq.uiCacheOpAddrType != PVRSRV_CACHE_OP_ADDR_TYPE_VIRTUAL) |
| { |
| /* Look-up PMR CpuPhyAddr once, if possible */ |
| eError = PMR_CpuPhysAddr(psPMR, |
| gsCwq.uiPageShift, |
| ui32NumOfPages, |
| uiPgAlignedStartOffset, |
| psCpuPhyAddr, |
| pbValid); |
| if (eError == PVRSRV_OK) |
| { |
| bIsPMRInfoValid = IMG_TRUE; |
| } |
| } |
| else |
| { |
| /* Look-up PMR per-page validity once, if possible */ |
| eError = PMR_IsOffsetValid(psPMR, |
| gsCwq.uiPageShift, |
| ui32NumOfPages, |
| uiPgAlignedStartOffset, |
| pbValid); |
| bIsPMRInfoValid = (eError == PVRSRV_OK) ? IMG_TRUE : IMG_FALSE; |
| } |
| } |
| |
| /* For each (possibly non-contiguous) PMR page(s), carry out the requested cache maint. op. */ |
| for (uiPgAlignedOffset = uiPgAlignedStartOffset, ui32PageIndex = 0; |
| uiPgAlignedOffset < uiPgAlignedEndOffset; |
| uiPgAlignedOffset += (IMG_DEVMEM_OFFSET_T) gsCwq.uiPageSize, ui32PageIndex += 1) |
| { |
| /* Just before issuing the CacheOp RBF, check if it can be discarded */ |
| if (gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] > ui32GFlushSeqNum) |
| { |
| #if defined(CACHEOP_DEBUG) |
| gsCwq.ui32KMDiscards += 1; |
| #endif |
| break; |
| } |
| |
| if (! bIsPMRInfoValid) |
| { |
| /* Never cross page boundary without looking up corresponding PMR page physical |
| address and/or page validity if these were not looked-up, in bulk, up-front */ |
| ui32PageIndex = 0; |
| if (gsCwq.uiCacheOpAddrType != PVRSRV_CACHE_OP_ADDR_TYPE_VIRTUAL) |
| { |
| eError = PMR_CpuPhysAddr(psPMR, |
| gsCwq.uiPageShift, |
| 1, |
| uiPgAlignedOffset, |
| psCpuPhyAddr, |
| pbValid); |
| PVR_LOGG_IF_ERROR(eError, "PMR_CpuPhysAddr", e0); |
| } |
| else |
| { |
| eError = PMR_IsOffsetValid(psPMR, |
| gsCwq.uiPageShift, |
| 1, |
| uiPgAlignedOffset, |
| pbValid); |
| PVR_LOGG_IF_ERROR(eError, "PMR_IsOffsetValid", e0); |
| } |
| } |
| |
| /* Skip invalid PMR pages (i.e. sparse) */ |
| if (pbValid[ui32PageIndex] == IMG_FALSE) |
| { |
| CACHEOP_PVR_ASSERT(bPMRIsSparse); |
| continue; |
| } |
| |
| if (pvAddress) |
| { |
| /* The caller has supplied either a KM/UM CpuVA, so use it unconditionally */ |
| pbCpuVirtAddr = |
| (void *)(uintptr_t)((uintptr_t)pvAddress + (uintptr_t)(uiPgAlignedOffset-uiPgAlignedStartOffset)); |
| } |
| /* Skip CpuVA acquire if CacheOp can be maintained entirely using CpuPA */ |
| else if (gsCwq.uiCacheOpAddrType != PVRSRV_CACHE_OP_ADDR_TYPE_PHYSICAL) |
| { |
| if (bPMRIsSparse) |
| { |
| eError = |
| PMRAcquireSparseKernelMappingData(psPMR, |
| uiPgAlignedOffset, |
| gsCwq.uiPageSize, |
| (void **)&pbCpuVirtAddr, |
| (size_t*)&uiOutSize, |
| &hPrivOut); |
| PVR_LOGG_IF_ERROR(eError, "PMRAcquireSparseKernelMappingData", e0); |
| } |
| else |
| { |
| eError = |
| PMRAcquireKernelMappingData(psPMR, |
| uiPgAlignedOffset, |
| gsCwq.uiPageSize, |
| (void **)&pbCpuVirtAddr, |
| (size_t*)&uiOutSize, |
| &hPrivOut); |
| PVR_LOGG_IF_ERROR(eError, "PMRAcquireKernelMappingData", e0); |
| } |
| } |
| |
| /* Issue actual cache maintenance for PMR */ |
| CacheOpExecRangeBased(psDevNode, |
| uiCacheOp, |
| pbCpuVirtAddr, |
| (gsCwq.uiCacheOpAddrType != PVRSRV_CACHE_OP_ADDR_TYPE_VIRTUAL) ? |
| psCpuPhyAddr[ui32PageIndex] : psCpuPhyAddr[0], |
| uiPgAlignedOffset, |
| uiCLAlignedStartOffset, |
| uiCLAlignedEndOffset); |
| |
| if (! pvAddress) |
| { |
| /* The caller has not supplied either a KM/UM CpuVA, release mapping */ |
| if (gsCwq.uiCacheOpAddrType != PVRSRV_CACHE_OP_ADDR_TYPE_PHYSICAL) |
| { |
| eError = PMRReleaseKernelMappingData(psPMR, hPrivOut); |
| PVR_LOG_IF_ERROR(eError, "PMRReleaseKernelMappingData"); |
| } |
| } |
| } |
| |
| e0: |
| if (psCpuPhyAddr != asCpuPhyAddr) |
| { |
| OSFreeMem(psCpuPhyAddr); |
| } |
| |
| if (pbValid != abValid) |
| { |
| OSFreeMem(pbValid); |
| } |
| |
| if (! bIsRequestValidated) |
| { |
| eError = PMRUnlockSysPhysAddresses(psPMR); |
| PVR_LOG_IF_ERROR(eError, "PMRUnlockSysPhysAddresses"); |
| } |
| |
| return eError; |
| } |
| |
| static PVRSRV_ERROR CacheOpQListExecGlobal(void) |
| { |
| PVRSRV_ERROR eError = PVRSRV_OK; |
| IMG_UINT32 ui32NumOfEntries; |
| CACHEOP_WORK_ITEM *psCacheOpWorkItem; |
| #if defined(CACHEOP_DEBUG) |
| IMG_UINT64 uiTimeNow = 0; |
| IMG_UINT64 ui64DequeuedTime; |
| CACHEOP_WORK_ITEM sCacheOpWorkItem = {0}; |
| #endif |
| CACHEOP_PVR_ASSERT(!gsCwq.bNoGlobalFlushImpl); |
| |
| /* Take the current snapshot of queued CacheOps before we issue a global cache |
| flush operation so that we retire the right amount of CacheOps that has |
| been affected by the to-be executed global CacheOp */ |
| ui32NumOfEntries = CacheOpIdxSpan(&gsCwq.hWriteCounter, &gsCwq.hReadCounter); |
| if (OSAtomicRead(&gsCwq.hWriteCounter) < OSAtomicRead(&gsCwq.hReadCounter)) |
| { |
| /* Branch handles when the write-counter has wrapped-around in value space. |
| The logic here works seeing the read-counter does not change value for |
| the duration of this function so we don't run the risk of it too wrapping |
| round whilst the number of entries is being determined here, that is to |
| say, the consumer in this framework is single threaded and this function |
| is that consumer thread */ |
| ui32NumOfEntries = CacheOpIdxSpan(&gsCwq.hReadCounter, &gsCwq.hWriteCounter); |
| |
| /* Two's complement arithmetic gives the number of entries */ |
| ui32NumOfEntries = CACHEOP_INDICES_MAX - ui32NumOfEntries; |
| } |
| if (! ui32NumOfEntries) |
| { |
| return PVRSRV_OK; |
| } |
| #if defined(CACHEOP_DEBUG) |
| CACHEOP_PVR_ASSERT(ui32NumOfEntries < CACHEOP_INDICES_MAX); |
| #endif |
| |
| /* Use the current/latest queue-tail work-item's GF/SeqNum to predicate GF */ |
| psCacheOpWorkItem = &gsCwq.asWorkItems[CacheOpIdxRead(&gsCwq.hWriteCounter)]; |
| CacheOpQItemReadCheck(psCacheOpWorkItem); |
| #if defined(CACHEOP_DEBUG) |
| /* The time waiting in the queue to be serviced */ |
| ui64DequeuedTime = OSClockns64(); |
| #endif |
| |
| /* Check if items between [hRead/hWrite]Counter can be discarded before issuing GF */ |
| if (gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] > psCacheOpWorkItem->ui32GFSeqNum) |
| { |
| /* The currently discarded CacheOp item updates gsCwq.hCompletedSeqNum */ |
| OSAtomicWrite(&gsCwq.hCompletedSeqNum, psCacheOpWorkItem->ui32OpSeqNum); |
| #if defined(CACHEOP_DEBUG) |
| gsCwq.ui32KMDiscards += ui32NumOfEntries; |
| #endif |
| } |
| else |
| { |
| eError = CacheOpGlobalFlush(); |
| PVR_LOGR_IF_ERROR(eError, "CacheOpGlobalFlush"); |
| #if defined(CACHEOP_DEBUG) |
| uiTimeNow = OSClockns64(); |
| sCacheOpWorkItem.bDeferred = IMG_TRUE; |
| sCacheOpWorkItem.ui64ExecuteTime = uiTimeNow; |
| sCacheOpWorkItem.psPMR = gsCwq.psInfoPagePMR; |
| sCacheOpWorkItem.pid = OSGetCurrentProcessID(); |
| sCacheOpWorkItem.uiCacheOp = PVRSRV_CACHE_OP_GLOBAL; |
| sCacheOpWorkItem.ui64DequeuedTime = ui64DequeuedTime; |
| sCacheOpWorkItem.ui64EnqueuedTime = psCacheOpWorkItem->ui64EnqueuedTime; |
| sCacheOpWorkItem.ui32OpSeqNum = gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0]; |
| #endif |
| } |
| |
| while (ui32NumOfEntries) |
| { |
| psCacheOpWorkItem = &gsCwq.asWorkItems[CacheOpIdxNext(&gsCwq.hReadCounter)]; |
| CacheOpQItemReadCheck(psCacheOpWorkItem); |
| |
| #if defined(CACHEOP_DEBUG) |
| if (psCacheOpWorkItem->uiCacheOp != PVRSRV_CACHE_OP_GLOBAL) |
| { |
| psCacheOpWorkItem->bRBF = IMG_FALSE; |
| if (! uiTimeNow) |
| { |
| /* Measure deferred queueing overhead only */ |
| uiTimeNow = OSClockns64(); |
| psCacheOpWorkItem->ui64ExecuteTime = uiTimeNow; |
| } |
| else |
| { |
| psCacheOpWorkItem->ui64ExecuteTime = uiTimeNow; |
| } |
| psCacheOpWorkItem->ui64DequeuedTime = ui64DequeuedTime; |
| CacheOpStatsExecLogWrite(psCacheOpWorkItem); |
| } |
| /* Something's gone horribly wrong if these 2 counters are identical at this point */ |
| CACHEOP_PVR_ASSERT(OSAtomicRead(&gsCwq.hWriteCounter) != OSAtomicRead(&gsCwq.hReadCounter)); |
| #endif |
| |
| /* If CacheOp is timeline(d), notify timeline waiters */ |
| eError = CacheOpTimelineExec(psCacheOpWorkItem); |
| PVR_LOG_IF_ERROR(eError, "CacheOpTimelineExec"); |
| |
| /* Mark index as ready for recycling for next CacheOp */ |
| CacheOpQItemRecycle(psCacheOpWorkItem); |
| (void) CacheOpIdxIncrement(&gsCwq.hReadCounter); |
| ui32NumOfEntries = ui32NumOfEntries - 1; |
| } |
| |
| #if defined(CACHEOP_DEBUG) |
| if (uiTimeNow) |
| { |
| /* Only log GF that was actually executed */ |
| CacheOpStatsExecLogWrite(&sCacheOpWorkItem); |
| } |
| #endif |
| |
| return eError; |
| } |
| |
| static PVRSRV_ERROR CacheOpQListExecRangeBased(void) |
| { |
| IMG_UINT32 ui32NumOfEntries; |
| PVRSRV_ERROR eError = PVRSRV_OK; |
| IMG_UINT32 ui32WriteCounter = ~0; |
| IMG_BOOL bUsedGlobalFlush = IMG_FALSE; |
| CACHEOP_WORK_ITEM *psCacheOpWorkItem = NULL; |
| #if defined(CACHEOP_DEBUG) |
| IMG_UINT64 uiTimeNow = 0; |
| #endif |
| |
| /* Take a snapshot of the current count of deferred entries at this junction */ |
| ui32NumOfEntries = CacheOpIdxSpan(&gsCwq.hWriteCounter, &gsCwq.hReadCounter); |
| if (! ui32NumOfEntries) |
| { |
| return PVRSRV_OK; |
| } |
| #if defined(CACHEOP_DEBUG) |
| CACHEOP_PVR_ASSERT(ui32NumOfEntries < CACHEOP_INDICES_MAX); |
| #endif |
| |
| while (ui32NumOfEntries) |
| { |
| if (! OSAtomicRead(&gsCwq.hReadCounter)) |
| { |
| /* Normally, the read-counter will trail the write counter until the write |
| counter wraps-round to zero. Under this condition we (re)calculate as the |
| read-counter too is wrapping around at this point */ |
| ui32NumOfEntries = CacheOpIdxSpan(&gsCwq.hWriteCounter, &gsCwq.hReadCounter); |
| } |
| #if defined(CACHEOP_DEBUG) |
| /* Something's gone horribly wrong if these 2 counters are identical at this point */ |
| CACHEOP_PVR_ASSERT(OSAtomicRead(&gsCwq.hWriteCounter) != OSAtomicRead(&gsCwq.hReadCounter)); |
| #endif |
| |
| /* Select the next pending deferred work-item for RBF cache maintenance */ |
| psCacheOpWorkItem = &gsCwq.asWorkItems[CacheOpIdxNext(&gsCwq.hReadCounter)]; |
| CacheOpQItemReadCheck(psCacheOpWorkItem); |
| #if defined(CACHEOP_DEBUG) |
| /* The time waiting in the queue to be serviced */ |
| psCacheOpWorkItem->ui64DequeuedTime = OSClockns64(); |
| #endif |
| |
| /* The following CacheOpPMRExec() could trigger a GF, so we (re)read this |
| counter just in case so that we know all such pending CacheOp(s) that will |
| benefit from this soon-to-be-executed GF */ |
| ui32WriteCounter = CacheOpConfigSupports(CACHEOP_CONFIG_KGF) ? |
| OSAtomicRead(&gsCwq.hWriteCounter) : ui32WriteCounter; |
| |
| eError = CacheOpPMRExec(psCacheOpWorkItem->psPMR, |
| NULL, /* No UM virtual address */ |
| psCacheOpWorkItem->uiOffset, |
| psCacheOpWorkItem->uiSize, |
| psCacheOpWorkItem->uiCacheOp, |
| psCacheOpWorkItem->ui32GFSeqNum, |
| IMG_TRUE, /* PMR is pre-validated */ |
| &bUsedGlobalFlush); |
| if (eError != PVRSRV_OK) |
| { |
| #if defined(CACHEOP_DEBUG) |
| #define PID_FMTSPEC " PID:%u" |
| #define CACHE_OP_WORK_PID psCacheOpWorkItem->pid |
| #else |
| #define PID_FMTSPEC "%s" |
| #define CACHE_OP_WORK_PID "" |
| #endif |
| |
| PVR_LOG(("Deferred CacheOpPMRExec failed:" |
| PID_FMTSPEC |
| " PMR:%p" |
| " Offset:%" IMG_UINT64_FMTSPECX |
| " Size:%" IMG_UINT64_FMTSPECX |
| " CacheOp:%d," |
| " error: %d", |
| CACHE_OP_WORK_PID, |
| psCacheOpWorkItem->psPMR, |
| psCacheOpWorkItem->uiOffset, |
| psCacheOpWorkItem->uiSize, |
| psCacheOpWorkItem->uiCacheOp, |
| eError)); |
| |
| #undef PID_FMTSPEC |
| #undef CACHE_OP_WORK_PID |
| } |
| else if (bUsedGlobalFlush) |
| { |
| #if defined(CACHEOP_DEBUG) |
| psCacheOpWorkItem->ui32OpSeqNum = gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0]; |
| #endif |
| break; |
| } |
| |
| #if defined(CACHEOP_DEBUG) |
| if (psCacheOpWorkItem->uiCacheOp != PVRSRV_CACHE_OP_GLOBAL) |
| { |
| psCacheOpWorkItem->bRBF = IMG_TRUE; |
| psCacheOpWorkItem->ui64ExecuteTime = OSClockns64(); |
| CacheOpStatsExecLogWrite(psCacheOpWorkItem); |
| } |
| else |
| { |
| CACHEOP_PVR_ASSERT(!gsCwq.bNoGlobalFlushImpl); |
| } |
| #endif |
| |
| /* The currently executed CacheOp item updates gsCwq.hCompletedSeqNum. |
| NOTE: This CacheOp item might be a discard item, if so its seqNum |
| still updates the gsCwq.hCompletedSeqNum */ |
| OSAtomicWrite(&gsCwq.hCompletedSeqNum, psCacheOpWorkItem->ui32OpSeqNum); |
| OSAtomicSubtract(&gsCwq.hDeferredSize, psCacheOpWorkItem->uiSize); |
| |
| /* If CacheOp is timeline(d), notify timeline waiters */ |
| eError = CacheOpTimelineExec(psCacheOpWorkItem); |
| PVR_LOG_IF_ERROR(eError, "CacheOpTimelineExec"); |
| |
| /* Indicate that this CCB work-item slot is now free for (re)use */ |
| CacheOpQItemRecycle(psCacheOpWorkItem); |
| (void) CacheOpIdxIncrement(&gsCwq.hReadCounter); |
| ui32NumOfEntries = ui32NumOfEntries - 1; |
| } |
| |
| if (bUsedGlobalFlush) |
| { |
| #if defined(CACHEOP_DEBUG) |
| uiTimeNow = OSClockns64(); |
| CACHEOP_PVR_ASSERT(OSAtomicRead(&gsCwq.hWriteCounter) != OSAtomicRead(&gsCwq.hReadCounter)); |
| #endif |
| |
| /* Snapshot of queued CacheOps before the global cache flush was issued */ |
| ui32NumOfEntries = ui32WriteCounter - OSAtomicRead(&gsCwq.hReadCounter); |
| if (ui32WriteCounter < OSAtomicRead(&gsCwq.hReadCounter)) |
| { |
| /* Branch handles when the write-counter has wrapped-around in value space */ |
| ui32NumOfEntries = OSAtomicRead(&gsCwq.hReadCounter) - ui32WriteCounter; |
| ui32NumOfEntries = CACHEOP_INDICES_MAX - ui32NumOfEntries; |
| } |
| |
| while (ui32NumOfEntries) |
| { |
| CacheOpQItemReadCheck(psCacheOpWorkItem); |
| |
| #if defined(CACHEOP_DEBUG) |
| psCacheOpWorkItem->bRBF = IMG_FALSE; |
| psCacheOpWorkItem->ui64ExecuteTime = uiTimeNow; |
| if (psCacheOpWorkItem->uiCacheOp == PVRSRV_CACHE_OP_GLOBAL) |
| { |
| CACHEOP_PVR_ASSERT(!gsCwq.bNoGlobalFlushImpl); |
| psCacheOpWorkItem->pid = OSGetCurrentProcessID(); |
| } |
| CacheOpStatsExecLogWrite(psCacheOpWorkItem); |
| #endif |
| |
| eError = CacheOpTimelineExec(psCacheOpWorkItem); |
| PVR_LOG_IF_ERROR(eError, "CacheOpTimelineExec"); |
| |
| /* Mark index as ready for recycling for next CacheOp */ |
| CacheOpQItemRecycle(psCacheOpWorkItem); |
| (void) CacheOpIdxIncrement(&gsCwq.hReadCounter); |
| ui32NumOfEntries = ui32NumOfEntries - 1; |
| psCacheOpWorkItem = &gsCwq.asWorkItems[CacheOpIdxNext(&gsCwq.hReadCounter)]; |
| } |
| } |
| |
| return eError; |
| } |
| |
| static INLINE PVRSRV_ERROR CacheOpQListExec(void) |
| { |
| PVRSRV_ERROR eError; |
| |
| if (CacheOpConfigSupports(CACHEOP_CONFIG_KGF) && |
| (!CacheOpConfigSupports(CACHEOP_CONFIG_KRBF) |
| || OSAtomicRead(&gsCwq.hDeferredSize) > gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD])) |
| { |
| eError = CacheOpQListExecGlobal(); |
| PVR_LOG_IF_ERROR(eError, "CacheOpQListExecGlobal"); |
| } |
| else |
| { |
| eError = CacheOpQListExecRangeBased(); |
| PVR_LOG_IF_ERROR(eError, "CacheOpQListExecRangeBased"); |
| } |
| |
| /* Signal any waiting threads blocked on CacheOp fence checks update |
| completed sequence number to last queue work item */ |
| eError = OSEventObjectSignal(gsCwq.hClientWakeUpEvtObj); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectSignal"); |
| |
| return eError; |
| } |
| |
| static void CacheOpThread(void *pvData) |
| { |
| PVRSRV_DATA *psPVRSRVData = pvData; |
| IMG_HANDLE hOSEvent; |
| PVRSRV_ERROR eError; |
| |
| /* Open CacheOp thread event object, abort driver if event object open fails */ |
| eError = OSEventObjectOpen(gsCwq.hThreadWakeUpEvtObj, &hOSEvent); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectOpen"); |
| |
| /* While driver is in good state & loaded, perform pending cache maintenance */ |
| while ((psPVRSRVData->eServicesState == PVRSRV_SERVICES_STATE_OK) && gsCwq.bInit) |
| { |
| /* Sleep-wait here until when signalled for new queued CacheOp work items; |
| when woken-up, drain deferred queue completely before next event-wait */ |
| (void) OSEventObjectWaitKernel(hOSEvent, CACHEOP_THREAD_WAIT_TIMEOUT); |
| while (CacheOpIdxSpan(&gsCwq.hWriteCounter, &gsCwq.hReadCounter)) |
| { |
| eError = CacheOpQListExec(); |
| PVR_LOG_IF_ERROR(eError, "CacheOpQListExec"); |
| } |
| } |
| |
| eError = CacheOpQListExec(); |
| PVR_LOG_IF_ERROR(eError, "CacheOpQListExec"); |
| |
| eError = OSEventObjectClose(hOSEvent); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectClose"); |
| } |
| |
| static PVRSRV_ERROR CacheOpBatchExecTimeline(PVRSRV_DEVICE_NODE *psDevNode, |
| PVRSRV_TIMELINE iTimeline, |
| IMG_BOOL bUsedGlobalFlush, |
| IMG_UINT32 ui32CurrentFenceSeqNum, |
| IMG_UINT32 *pui32NextFenceSeqNum) |
| { |
| PVRSRV_ERROR eError; |
| IMG_UINT32 ui32NextIdx; |
| CACHEOP_WORK_ITEM sCacheOpWorkItem = { }; |
| CACHEOP_WORK_ITEM *psCacheOpWorkItem = NULL; |
| |
| eError = CacheOpTimelineBind(psDevNode, &sCacheOpWorkItem, iTimeline); |
| PVR_LOGR_IF_ERROR(eError, "CacheOpTimelineBind"); |
| |
| OSLockAcquire(gsCwq.hDeferredLock); |
| |
| /* |
| Check if there is any deferred queueing space available and that nothing is |
| currently queued. This second check is required as Android where timelines |
| are used sets a timeline signalling deadline of 1000ms to signal timelines |
| else complains. So seeing we cannot be sure how long the CacheOp presently |
| in the queue would take we should not send this timeline down the queue as |
| well. |
| */ |
| ui32NextIdx = CacheOpIdxNext(&gsCwq.hWriteCounter); |
| if (!CacheOpIdxSpan(&gsCwq.hWriteCounter, &gsCwq.hReadCounter) && |
| CacheOpIdxRead(&gsCwq.hReadCounter) != ui32NextIdx) |
| { |
| psCacheOpWorkItem = &gsCwq.asWorkItems[ui32NextIdx]; |
| CacheOpQItemWriteCheck(psCacheOpWorkItem); |
| |
| psCacheOpWorkItem->sSWTimelineObj = sCacheOpWorkItem.sSWTimelineObj; |
| psCacheOpWorkItem->iTimeline = sCacheOpWorkItem.iTimeline; |
| psCacheOpWorkItem->ui32OpSeqNum = CacheOpGetNextCommonSeqNum(); |
| psCacheOpWorkItem->uiCacheOp = PVRSRV_CACHE_OP_TIMELINE; |
| psCacheOpWorkItem->uiOffset = (IMG_DEVMEM_OFFSET_T)0; |
| psCacheOpWorkItem->uiSize = (IMG_DEVMEM_SIZE_T)0; |
| psCacheOpWorkItem->ui32GFSeqNum = 0; |
| psCacheOpWorkItem->psDevNode = psDevNode; |
| /* Defer timeline using information page PMR */ |
| psCacheOpWorkItem->psPMR = gsCwq.psInfoPagePMR; |
| eError = PMRLockSysPhysAddresses(psCacheOpWorkItem->psPMR); |
| PVR_LOGG_IF_ERROR(eError, "PMRLockSysPhysAddresses", e0); |
| #if defined(CACHEOP_DEBUG) |
| psCacheOpWorkItem->pid = OSGetCurrentClientProcessIDKM(); |
| psCacheOpWorkItem->ui64EnqueuedTime = OSClockns64(); |
| gsCwq.ui32ServerASync += 1; |
| gsCwq.ui32ServerDTL += 1; |
| #endif |
| |
| /* Mark index ready for cache maintenance */ |
| (void) CacheOpIdxIncrement(&gsCwq.hWriteCounter); |
| |
| OSLockRelease(gsCwq.hDeferredLock); |
| |
| /* Signal the CacheOp thread to ensure this GF get processed */ |
| eError = OSEventObjectSignal(gsCwq.hThreadWakeUpEvtObj); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectSignal"); |
| } |
| else |
| { |
| IMG_BOOL bExecTimeline = IMG_TRUE; |
| IMG_UINT32 ui32CompletedOpSeqNum = OSAtomicRead(&gsCwq.hCompletedSeqNum); |
| |
| OSLockRelease(gsCwq.hDeferredLock); |
| |
| /* |
| This pathway requires careful handling here as the client CacheOp(s) predicated on this |
| timeline might have been broken-up (i.e. batched) into several server requests by client: |
| 1 - In the first case, a CacheOp from an earlier batch is still in-flight, so we check if |
| this is the case because even though we might have executed all the CacheOps in this batch |
| synchronously, we cannot be sure that any in-flight CacheOp pending on this client is not |
| predicated on this timeline hence we need to synchronise here for safety by fencing until |
| all in-flight CacheOps are completed. NOTE: On Android, this might cause issues due to |
| timelines notification deadlines so we do not fence (i.e. cannot sleep or wait) here to |
| synchronise, instead nudge services client to retry the request if there is no GF support. |
| 2 - In the second case, there is no in-flight CacheOp for this client in which case just |
| continue processing as normal. |
| */ |
| if (!bUsedGlobalFlush && !CacheOpFenceCheck(ui32CompletedOpSeqNum, ui32CurrentFenceSeqNum)) |
| { |
| #if defined(ANDROID) |
| bExecTimeline = IMG_TRUE; |
| if (CacheOpGlobalFlush() != PVRSRV_OK) |
| { |
| bExecTimeline = IMG_FALSE; |
| eError = PVRSRV_ERROR_RETRY; |
| } |
| #else |
| eError = CacheOpFence ((RGXFWIF_DM)0, ui32CurrentFenceSeqNum); |
| PVR_LOG_IF_ERROR(eError, "CacheOpFence"); |
| |
| /* CacheOpFence() might have triggered a GF so we take advantage of it */ |
| if (gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] > ui32CurrentFenceSeqNum) |
| { |
| *pui32NextFenceSeqNum = 0; |
| } |
| #endif |
| } |
| |
| if (bExecTimeline) |
| { |
| /* CacheOp fence requirement met, signal timeline */ |
| eError = CacheOpTimelineExec(&sCacheOpWorkItem); |
| PVR_LOG_IF_ERROR(eError, "CacheOpTimelineExec"); |
| } |
| } |
| |
| return eError; |
| e0: |
| if (psCacheOpWorkItem) |
| { |
| /* Need to ensure we leave this CacheOp QItem in the proper recycled state */ |
| CacheOpQItemRecycle(psCacheOpWorkItem); |
| OSLockRelease(gsCwq.hDeferredLock); |
| } |
| |
| return eError; |
| } |
| |
| static PVRSRV_ERROR CacheOpBatchExecRangeBased(PVRSRV_DEVICE_NODE *psDevNode, |
| PMR **ppsPMR, |
| IMG_CPU_VIRTADDR *pvAddress, |
| IMG_DEVMEM_OFFSET_T *puiOffset, |
| IMG_DEVMEM_SIZE_T *puiSize, |
| PVRSRV_CACHE_OP *puiCacheOp, |
| IMG_UINT32 ui32NumCacheOps, |
| PVRSRV_TIMELINE uiTimeline, |
| IMG_UINT32 ui32GlobalFlushSeqNum, |
| IMG_UINT32 uiCurrentFenceSeqNum, |
| IMG_UINT32 *pui32NextFenceSeqNum) |
| { |
| IMG_UINT32 ui32Idx; |
| IMG_UINT32 ui32NextIdx; |
| IMG_BOOL bBatchHasTimeline; |
| IMG_BOOL bCacheOpConfigKDF; |
| IMG_BOOL bCacheOpConfigKRBF; |
| IMG_DEVMEM_SIZE_T uiLogicalSize; |
| PVRSRV_ERROR eError = PVRSRV_OK; |
| IMG_BOOL bUseGlobalFlush = IMG_FALSE; |
| CACHEOP_WORK_ITEM *psCacheOpWorkItem = NULL; |
| #if defined(CACHEOP_DEBUG) |
| CACHEOP_WORK_ITEM sCacheOpWorkItem = {0}; |
| IMG_UINT32 ui32OpSeqNum = CacheOpGetNextCommonSeqNum(); |
| sCacheOpWorkItem.pid = OSGetCurrentClientProcessIDKM(); |
| #endif |
| |
| /* Check if batch has an associated timeline update */ |
| bBatchHasTimeline = puiCacheOp[ui32NumCacheOps-1] & PVRSRV_CACHE_OP_TIMELINE; |
| puiCacheOp[ui32NumCacheOps-1] &= ~(PVRSRV_CACHE_OP_GLOBAL | PVRSRV_CACHE_OP_TIMELINE); |
| |
| /* Check if config. supports kernel deferring of cacheops */ |
| bCacheOpConfigKDF = CacheOpConfigSupports(CACHEOP_CONFIG_KDF); |
| bCacheOpConfigKRBF = CacheOpConfigSupports(CACHEOP_CONFIG_KRBF); |
| |
| /* |
| Client expects the next fence seqNum to be zero unless the server has deferred |
| at least one CacheOp in the submitted queue in which case the server informs |
| the client of the last CacheOp seqNum deferred in this batch. |
| */ |
| for (*pui32NextFenceSeqNum = 0, ui32Idx = 0; ui32Idx < ui32NumCacheOps; ui32Idx++) |
| { |
| if (! puiSize[ui32Idx]) |
| { |
| /* Fail UM request, don't silently ignore */ |
| eError = PVRSRV_ERROR_INVALID_PARAMS; |
| goto e0; |
| } |
| else if (bCacheOpConfigKDF) |
| { |
| /* Check if there is deferred queueing space available */ |
| ui32NextIdx = CacheOpIdxNext(&gsCwq.hWriteCounter); |
| if (ui32NextIdx != CacheOpIdxRead(&gsCwq.hReadCounter)) |
| { |
| psCacheOpWorkItem = &gsCwq.asWorkItems[ui32NextIdx]; |
| } |
| } |
| |
| /* |
| Normally, we would like to defer client CacheOp(s) but we may not always be in a |
| position or is necessary to do so based on the following reasons: |
| 0 - There is currently no queueing space left to enqueue this CacheOp, this might |
| imply the system is queueing more requests than can be consumed by the CacheOp |
| thread in time. |
| 1 - Batch has timeline, action this now due to Android timeline signaling deadlines. |
| 2 - Configuration does not support deferring of cache maintenance operations so we |
| execute the batch synchronously/immediately. |
| 3 - CacheOp has an INVALIDATE, as this is used to transfer device memory buffer |
| ownership back to the processor, we cannot defer it so action it immediately. |
| 4 - CacheOp size too small (single OS page size) to warrant overhead of deferment, |
| this will not be considered if KRBF is not present, as it implies defer all. |
| 5 - CacheOp size OK for deferment, but a client virtual address is supplied so we |
| might has well just take advantage of said VA & flush immediately in UM context. |
| 6 - Prevent DoS attack if a malicious client queues something very large, say 1GiB |
| and the processor cache ISA does not have a global flush implementation. Here |
| we upper bound this threshold to PVR_DIRTY_BYTES_FLUSH_THRESHOLD. |
| 7 - Ensure QoS (load balancing) by not over-loading queue with too much requests, |
| here the (pseudo) alternate queue is the user context so we execute directly |
| on it if the processor cache ISA does not have a global flush implementation. |
| */ |
| if (!psCacheOpWorkItem || |
| bBatchHasTimeline || |
| !bCacheOpConfigKDF || |
| puiCacheOp[ui32Idx] & PVRSRV_CACHE_OP_INVALIDATE || |
| (bCacheOpConfigKRBF && puiSize[ui32Idx] <= (IMG_DEVMEM_SIZE_T)gsCwq.uiPageSize) || |
| (pvAddress[ui32Idx] && puiSize[ui32Idx] < (IMG_DEVMEM_SIZE_T)gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD]) || |
| (gsCwq.bNoGlobalFlushImpl && puiSize[ui32Idx] >= (IMG_DEVMEM_SIZE_T)(gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD] << 2)) || |
| (gsCwq.bNoGlobalFlushImpl && OSAtomicRead(&gsCwq.hDeferredSize) >= gsCwq.pui32InfoPage[CACHEOP_INFO_KMDFTHRESHLD] << CACHEOP_INDICES_LOG2_SIZE)) |
| { |
| /* When the CacheOp thread not keeping up, trash d-cache */ |
| bUseGlobalFlush = !psCacheOpWorkItem && bCacheOpConfigKDF ? IMG_TRUE : IMG_FALSE; |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.ui64EnqueuedTime = OSClockns64(); |
| gsCwq.ui32ServerSync += 1; |
| #endif |
| psCacheOpWorkItem = NULL; |
| |
| eError = CacheOpPMRExec(ppsPMR[ui32Idx], |
| pvAddress[ui32Idx], |
| puiOffset[ui32Idx], |
| puiSize[ui32Idx], |
| puiCacheOp[ui32Idx], |
| ui32GlobalFlushSeqNum, |
| IMG_FALSE, |
| &bUseGlobalFlush); |
| PVR_LOGG_IF_ERROR(eError, "CacheOpExecPMR", e0); |
| |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.ui64ExecuteTime = OSClockns64(); |
| sCacheOpWorkItem.bRBF = !bUseGlobalFlush; |
| sCacheOpWorkItem.ui32OpSeqNum = bUseGlobalFlush ? |
| gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] : ui32OpSeqNum; |
| sCacheOpWorkItem.psPMR = ppsPMR[ui32Idx]; |
| sCacheOpWorkItem.uiSize = puiSize[ui32Idx]; |
| sCacheOpWorkItem.uiOffset = puiOffset[ui32Idx]; |
| sCacheOpWorkItem.uiCacheOp = puiCacheOp[ui32Idx]; |
| CacheOpStatsExecLogWrite(&sCacheOpWorkItem); |
| #endif |
| |
| if (bUseGlobalFlush) break; |
| continue; |
| } |
| |
| /* Need to validate request parameters here before enqueing */ |
| eError = PMR_LogicalSize(ppsPMR[ui32Idx], &uiLogicalSize); |
| PVR_LOGG_IF_ERROR(eError, "PMR_LogicalSize", e0); |
| eError = PVRSRV_ERROR_DEVICEMEM_OUT_OF_RANGE; |
| PVR_LOGG_IF_FALSE(((puiOffset[ui32Idx]+puiSize[ui32Idx]) <= uiLogicalSize), CACHEOP_DEVMEM_OOR_ERROR_STRING, e0); |
| eError = PVRSRV_OK; |
| |
| /* For safety, take reference here in user context */ |
| eError = PMRLockSysPhysAddresses(ppsPMR[ui32Idx]); |
| PVR_LOGG_IF_ERROR(eError, "PMRLockSysPhysAddresses", e0); |
| |
| OSLockAcquire(gsCwq.hDeferredLock); |
| |
| /* Select next item off the queue to defer with */ |
| ui32NextIdx = CacheOpIdxNext(&gsCwq.hWriteCounter); |
| if (ui32NextIdx != CacheOpIdxRead(&gsCwq.hReadCounter)) |
| { |
| psCacheOpWorkItem = &gsCwq.asWorkItems[ui32NextIdx]; |
| CacheOpQItemWriteCheck(psCacheOpWorkItem); |
| } |
| else |
| { |
| /* Retry, disable KDF for this batch */ |
| OSLockRelease(gsCwq.hDeferredLock); |
| bCacheOpConfigKDF = IMG_FALSE; |
| psCacheOpWorkItem = NULL; |
| ui32Idx = ui32Idx - 1; |
| continue; |
| } |
| |
| /* Timeline need to be looked-up (i.e. bind) in the user context |
| before deferring into the CacheOp thread kernel context */ |
| eError = CacheOpTimelineBind(psDevNode, psCacheOpWorkItem, PVRSRV_NO_TIMELINE); |
| PVR_LOGG_IF_ERROR(eError, "CacheOpTimelineBind", e1); |
| |
| /* Prepare & enqueue next deferred work item for CacheOp thread */ |
| psCacheOpWorkItem->ui32OpSeqNum = CacheOpGetNextCommonSeqNum(); |
| *pui32NextFenceSeqNum = psCacheOpWorkItem->ui32OpSeqNum; |
| psCacheOpWorkItem->ui32GFSeqNum = ui32GlobalFlushSeqNum; |
| psCacheOpWorkItem->uiCacheOp = puiCacheOp[ui32Idx]; |
| psCacheOpWorkItem->uiOffset = puiOffset[ui32Idx]; |
| psCacheOpWorkItem->uiSize = puiSize[ui32Idx]; |
| psCacheOpWorkItem->psPMR = ppsPMR[ui32Idx]; |
| psCacheOpWorkItem->psDevNode = psDevNode; |
| #if defined(CACHEOP_DEBUG) |
| psCacheOpWorkItem->ui64EnqueuedTime = OSClockns64(); |
| psCacheOpWorkItem->pid = sCacheOpWorkItem.pid; |
| psCacheOpWorkItem->bDeferred = IMG_TRUE; |
| psCacheOpWorkItem->bKMReq = IMG_FALSE; |
| psCacheOpWorkItem->bUMF = IMG_FALSE; |
| gsCwq.ui32ServerASync += 1; |
| #endif |
| |
| /* Increment deferred size & mark index ready for cache maintenance */ |
| OSAtomicAdd(&gsCwq.hDeferredSize, (IMG_UINT32)puiSize[ui32Idx]); |
| (void) CacheOpIdxIncrement(&gsCwq.hWriteCounter); |
| |
| OSLockRelease(gsCwq.hDeferredLock); |
| psCacheOpWorkItem = NULL; |
| } |
| |
| /* Signal the CacheOp thread to ensure these items get processed */ |
| eError = OSEventObjectSignal(gsCwq.hThreadWakeUpEvtObj); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectSignal"); |
| |
| if (bUseGlobalFlush) |
| { |
| #if defined(CACHEOP_DEBUG) |
| /* GF was logged already in the loop above, so rest if any are discards */ |
| sCacheOpWorkItem.ui64ExecuteTime = sCacheOpWorkItem.ui64EnqueuedTime; |
| sCacheOpWorkItem.pid = OSGetCurrentClientProcessIDKM(); |
| while (++ui32Idx < ui32NumCacheOps) |
| { |
| sCacheOpWorkItem.psPMR = ppsPMR[ui32Idx]; |
| sCacheOpWorkItem.uiSize = puiSize[ui32Idx]; |
| sCacheOpWorkItem.uiOffset = puiOffset[ui32Idx]; |
| sCacheOpWorkItem.uiCacheOp = puiCacheOp[ui32Idx]; |
| CacheOpStatsExecLogWrite(&sCacheOpWorkItem); |
| gsCwq.ui32KMDiscards += 1; |
| } |
| #endif |
| |
| /* No next UM fence seqNum */ |
| *pui32NextFenceSeqNum = 0; |
| } |
| |
| e1: |
| if (psCacheOpWorkItem) |
| { |
| /* Need to ensure we leave this CacheOp QItem in the proper recycled state */ |
| CacheOpQItemRecycle(psCacheOpWorkItem); |
| OSLockRelease(gsCwq.hDeferredLock); |
| } |
| e0: |
| if (bBatchHasTimeline) |
| { |
| PVRSRV_ERROR eError2; |
| eError2 = CacheOpBatchExecTimeline(psDevNode, uiTimeline, bUseGlobalFlush, |
| uiCurrentFenceSeqNum, pui32NextFenceSeqNum); |
| eError = (eError2 == PVRSRV_ERROR_RETRY) ? eError2 : eError; |
| } |
| |
| return eError; |
| } |
| |
| static PVRSRV_ERROR CacheOpBatchExecGlobal(PVRSRV_DEVICE_NODE *psDevNode, |
| PMR **ppsPMR, |
| IMG_CPU_VIRTADDR *pvAddress, |
| IMG_DEVMEM_OFFSET_T *puiOffset, |
| IMG_DEVMEM_SIZE_T *puiSize, |
| PVRSRV_CACHE_OP *puiCacheOp, |
| IMG_UINT32 ui32NumCacheOps, |
| PVRSRV_TIMELINE uiTimeline, |
| IMG_UINT32 ui32GlobalFlushSeqNum, |
| IMG_UINT32 uiCurrentFenceSeqNum, |
| IMG_UINT32 *pui32NextFenceSeqNum) |
| { |
| IMG_UINT32 ui32Idx; |
| IMG_BOOL bBatchHasTimeline; |
| PVRSRV_ERROR eError = PVRSRV_OK; |
| IMG_BOOL bUseGlobalFlush = IMG_FALSE; |
| CACHEOP_WORK_ITEM *psCacheOpWorkItem = NULL; |
| #if defined(CACHEOP_DEBUG) |
| IMG_DEVMEM_SIZE_T uiTotalSize = 0; |
| CACHEOP_WORK_ITEM sCacheOpWorkItem = {0}; |
| sCacheOpWorkItem.pid = OSGetCurrentClientProcessIDKM(); |
| #endif |
| #if !defined(CACHEFLUSH_ISA_SUPPORTS_GLOBAL_FLUSH) |
| PVR_LOGR_IF_ERROR(PVRSRV_ERROR_NOT_SUPPORTED, CACHEOP_NO_GFLUSH_ERROR_STRING); |
| #endif |
| PVR_UNREFERENCED_PARAMETER(pvAddress); |
| |
| /* Check if batch has an associated timeline update request */ |
| bBatchHasTimeline = puiCacheOp[ui32NumCacheOps-1] & PVRSRV_CACHE_OP_TIMELINE; |
| puiCacheOp[ui32NumCacheOps-1] &= ~(PVRSRV_CACHE_OP_GLOBAL | PVRSRV_CACHE_OP_TIMELINE); |
| |
| /* Skip operation if an else-when GF has occurred in the interim time */ |
| if (gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] > ui32GlobalFlushSeqNum) |
| { |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.ui32OpSeqNum = ui32GlobalFlushSeqNum; |
| #endif |
| bUseGlobalFlush = IMG_TRUE; |
| *pui32NextFenceSeqNum = 0; |
| goto exec_timeline; |
| } |
| |
| /* Here we need to check that client batch does not contain an INVALIDATE CacheOp */ |
| for (*pui32NextFenceSeqNum = 0, ui32Idx = 0; ui32Idx < ui32NumCacheOps; ui32Idx++) |
| { |
| #if defined(CACHEOP_DEBUG) |
| IMG_DEVMEM_SIZE_T uiLogicalSize; |
| uiTotalSize += puiSize[ui32Idx]; |
| /* There is no need to validate request parameters as we are about |
| to issue a GF but this might lead to issues being reproducible |
| in one config but not the other, so valid under debug */ |
| eError = PMR_LogicalSize(ppsPMR[ui32Idx], &uiLogicalSize); |
| PVR_LOGG_IF_ERROR(eError, "PMR_LogicalSize", e0); |
| eError = PVRSRV_ERROR_DEVICEMEM_OUT_OF_RANGE; |
| PVR_LOGG_IF_FALSE(((puiOffset[ui32Idx]+puiSize[ui32Idx]) <= uiLogicalSize), CACHEOP_DEVMEM_OOR_ERROR_STRING, e0); |
| eError = PVRSRV_OK; |
| #endif |
| if (! puiSize[ui32Idx]) |
| { |
| /* Fail UM request, don't silently ignore */ |
| eError = PVRSRV_ERROR_INVALID_PARAMS; |
| goto e0; |
| } |
| else if (puiCacheOp[ui32Idx] & PVRSRV_CACHE_OP_INVALIDATE) |
| { |
| /* Invalidates cannot be deferred */ |
| bUseGlobalFlush = IMG_TRUE; |
| } |
| } |
| |
| OSLockAcquire(gsCwq.hDeferredLock); |
| |
| /* |
| Normally, we would like to defer client CacheOp(s) but we may not always be in a |
| position to do so based on the following reasons: |
| 0 - Batch has an INVALIDATE, as this is used to transfer device memory buffer |
| ownership back to the processor, we cannot defer it so action it immediately. |
| 1 - Configuration does not support deferring of cache maintenance operations so |
| we execute synchronously/immediately. |
| 2 - There is currently no queueing space left to enqueue this CacheOp, this might |
| imply the system is queueing more requests that can be consumed by the CacheOp |
| thread in time. |
| 3 - Batch has a timeline and there is currently something queued, we cannot defer |
| because currently queued operation(s) might take quite a while to action which |
| might cause a timeline deadline timeout. |
| */ |
| if (bUseGlobalFlush || |
| !CacheOpConfigSupports(CACHEOP_CONFIG_KDF) || |
| CacheOpIdxNext(&gsCwq.hWriteCounter) == CacheOpIdxRead(&gsCwq.hReadCounter) || |
| (bBatchHasTimeline && CacheOpIdxSpan(&gsCwq.hWriteCounter, &gsCwq.hReadCounter))) |
| |
| { |
| OSLockRelease(gsCwq.hDeferredLock); |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.ui32OpSeqNum = CacheOpGetNextCommonSeqNum(); |
| sCacheOpWorkItem.ui64EnqueuedTime = OSClockns64(); |
| #endif |
| eError = CacheOpGlobalFlush(); |
| PVR_LOGG_IF_ERROR(eError, "CacheOpGlobalFlush", e0); |
| bUseGlobalFlush = IMG_TRUE; |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.ui64ExecuteTime = OSClockns64(); |
| gsCwq.ui32ServerSync += 1; |
| #endif |
| goto exec_timeline; |
| } |
| |
| /* Select next item off queue to defer this GF and possibly timeline with */ |
| psCacheOpWorkItem = &gsCwq.asWorkItems[CacheOpIdxNext(&gsCwq.hWriteCounter)]; |
| CacheOpQItemWriteCheck(psCacheOpWorkItem); |
| |
| /* Defer the GF using information page PMR */ |
| psCacheOpWorkItem->psPMR = gsCwq.psInfoPagePMR; |
| eError = PMRLockSysPhysAddresses(psCacheOpWorkItem->psPMR); |
| PVR_LOGG_IF_ERROR(eError, "PMRLockSysPhysAddresses", e0); |
| |
| /* Timeline object has to be looked-up here in user context */ |
| eError = CacheOpTimelineBind(psDevNode, psCacheOpWorkItem, uiTimeline); |
| PVR_LOGG_IF_ERROR(eError, "CacheOpTimelineBind", e0); |
| |
| /* Prepare & enqueue next deferred work item for CacheOp thread */ |
| *pui32NextFenceSeqNum = CacheOpGetNextCommonSeqNum(); |
| psCacheOpWorkItem->ui32OpSeqNum = *pui32NextFenceSeqNum; |
| psCacheOpWorkItem->ui32GFSeqNum = ui32GlobalFlushSeqNum; |
| psCacheOpWorkItem->uiCacheOp = PVRSRV_CACHE_OP_GLOBAL; |
| psCacheOpWorkItem->uiOffset = (IMG_DEVMEM_OFFSET_T)0; |
| psCacheOpWorkItem->uiSize = (IMG_DEVMEM_SIZE_T)0; |
| #if defined(CACHEOP_DEBUG) |
| /* Note client pid & queueing time of deferred GF CacheOp */ |
| psCacheOpWorkItem->ui64EnqueuedTime = OSClockns64(); |
| psCacheOpWorkItem->pid = sCacheOpWorkItem.pid; |
| OSAtomicAdd(&gsCwq.hDeferredSize, uiTotalSize); |
| psCacheOpWorkItem->uiSize = uiTotalSize; |
| psCacheOpWorkItem->bDeferred = IMG_TRUE; |
| psCacheOpWorkItem->bKMReq = IMG_FALSE; |
| psCacheOpWorkItem->bUMF = IMG_FALSE; |
| /* Client CacheOp is logged using the deferred seqNum */ |
| sCacheOpWorkItem.ui32OpSeqNum = *pui32NextFenceSeqNum; |
| sCacheOpWorkItem.ui64EnqueuedTime = psCacheOpWorkItem->ui64EnqueuedTime; |
| sCacheOpWorkItem.ui64ExecuteTime = psCacheOpWorkItem->ui64EnqueuedTime; |
| /* Update the CacheOp statistics */ |
| gsCwq.ui32ServerASync += 1; |
| gsCwq.ui32ServerDGF += 1; |
| #endif |
| |
| /* Mark index ready for cache maintenance */ |
| (void) CacheOpIdxIncrement(&gsCwq.hWriteCounter); |
| |
| OSLockRelease(gsCwq.hDeferredLock); |
| |
| /* Signal CacheOp thread to ensure this GF get processed */ |
| eError = OSEventObjectSignal(gsCwq.hThreadWakeUpEvtObj); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectSignal"); |
| |
| exec_timeline: |
| if (bUseGlobalFlush && bBatchHasTimeline) |
| { |
| eError = CacheOpBatchExecTimeline(psDevNode, uiTimeline, bUseGlobalFlush, |
| uiCurrentFenceSeqNum, pui32NextFenceSeqNum); |
| } |
| |
| #if defined(CACHEOP_DEBUG) |
| for (ui32Idx = 0; ui32Idx < ui32NumCacheOps; ui32Idx++) |
| { |
| sCacheOpWorkItem.psPMR = ppsPMR[ui32Idx]; |
| sCacheOpWorkItem.uiSize = puiSize[ui32Idx]; |
| sCacheOpWorkItem.uiOffset = puiOffset[ui32Idx]; |
| sCacheOpWorkItem.uiCacheOp = puiCacheOp[ui32Idx]; |
| if (bUseGlobalFlush) |
| { |
| if (sCacheOpWorkItem.ui64ExecuteTime && ui32Idx) |
| { |
| /* Only first item carries the real execution time, rest are discards */ |
| sCacheOpWorkItem.ui64EnqueuedTime = sCacheOpWorkItem.ui64ExecuteTime; |
| } |
| gsCwq.ui32KMDiscards += !sCacheOpWorkItem.ui64ExecuteTime ? 1 : ui32Idx ? 1 : 0; |
| } |
| CacheOpStatsExecLogWrite(&sCacheOpWorkItem); |
| } |
| #endif |
| |
| return eError; |
| e0: |
| if (psCacheOpWorkItem) |
| { |
| /* Need to ensure we leave this CacheOp QItem in the proper recycled state */ |
| CacheOpQItemRecycle(psCacheOpWorkItem); |
| OSLockRelease(gsCwq.hDeferredLock); |
| } |
| |
| if (bBatchHasTimeline) |
| { |
| PVRSRV_ERROR eError2; |
| eError2 = CacheOpBatchExecTimeline(psDevNode, uiTimeline, IMG_FALSE, |
| uiCurrentFenceSeqNum, pui32NextFenceSeqNum); |
| eError = (eError2 == PVRSRV_ERROR_RETRY) ? eError2 : eError; |
| } |
| |
| return eError; |
| } |
| |
| PVRSRV_ERROR CacheOpExec (PPVRSRV_DEVICE_NODE psDevNode, |
| void *pvVirtStart, |
| void *pvVirtEnd, |
| IMG_CPU_PHYADDR sCPUPhysStart, |
| IMG_CPU_PHYADDR sCPUPhysEnd, |
| PVRSRV_CACHE_OP uiCacheOp) |
| { |
| PVRSRV_ERROR eError = PVRSRV_ERROR_RETRY; |
| #if defined(CACHEOP_DEBUG) |
| IMG_BOOL bUsedGlobalFlush = IMG_FALSE; |
| CACHEOP_WORK_ITEM sCacheOpWorkItem = {0}; |
| sCacheOpWorkItem.ui64EnqueuedTime = OSClockns64(); |
| #endif |
| |
| if (gsCwq.bInit) |
| { |
| IMG_DEVMEM_SIZE_T uiSize = sCPUPhysEnd.uiAddr - sCPUPhysStart.uiAddr; |
| if ((IMG_UINT32)uiSize > gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD]) |
| { |
| eError = CacheOpGlobalFlush(); |
| } |
| } |
| |
| if (eError == PVRSRV_OK) |
| { |
| #if defined(CACHEOP_DEBUG) |
| bUsedGlobalFlush = IMG_TRUE; |
| #endif |
| } |
| else |
| { |
| switch (uiCacheOp) |
| { |
| case PVRSRV_CACHE_OP_CLEAN: |
| OSCPUCacheCleanRangeKM(psDevNode, pvVirtStart, pvVirtEnd, sCPUPhysStart, sCPUPhysEnd); |
| break; |
| case PVRSRV_CACHE_OP_INVALIDATE: |
| OSCPUCacheInvalidateRangeKM(psDevNode, pvVirtStart, pvVirtEnd, sCPUPhysStart, sCPUPhysEnd); |
| break; |
| case PVRSRV_CACHE_OP_FLUSH: |
| OSCPUCacheFlushRangeKM(psDevNode, pvVirtStart, pvVirtEnd, sCPUPhysStart, sCPUPhysEnd); |
| break; |
| default: |
| PVR_DPF((PVR_DBG_ERROR, "%s: Invalid cache operation type %d", |
| __func__, uiCacheOp)); |
| break; |
| } |
| eError = PVRSRV_OK; |
| } |
| |
| #if defined(CACHEOP_DEBUG) |
| if (! CacheOpConfigSupports(CACHEOP_CONFIG_KLOG)) |
| { |
| if (bUsedGlobalFlush) |
| { |
| /* Undo the accounting for server GF done in CacheOpGlobalFlush() */ |
| gsCwq.ui32ServerGF -= 1; |
| } |
| } |
| else |
| { |
| gsCwq.ui32TotalExecOps += 1; |
| if (! bUsedGlobalFlush) |
| { |
| gsCwq.ui32ServerSync += 1; |
| gsCwq.ui32ServerRBF += |
| ((sCPUPhysEnd.uiAddr - sCPUPhysStart.uiAddr) & ((IMG_DEVMEM_SIZE_T)~(gsCwq.uiLineSize - 1))) >> gsCwq.uiLineShift; |
| } |
| sCacheOpWorkItem.uiOffset = 0; |
| sCacheOpWorkItem.bKMReq = IMG_TRUE; |
| sCacheOpWorkItem.uiCacheOp = uiCacheOp; |
| sCacheOpWorkItem.bRBF = !bUsedGlobalFlush; |
| /* Use information page PMR for logging KM request */ |
| sCacheOpWorkItem.psPMR = gsCwq.psInfoPagePMR; |
| sCacheOpWorkItem.ui64ExecuteTime = OSClockns64(); |
| sCacheOpWorkItem.pid = OSGetCurrentClientProcessIDKM(); |
| sCacheOpWorkItem.ui32OpSeqNum = CacheOpGetNextCommonSeqNum(); |
| sCacheOpWorkItem.uiSize = (sCPUPhysEnd.uiAddr - sCPUPhysStart.uiAddr); |
| CacheOpStatsExecLogWrite(&sCacheOpWorkItem); |
| } |
| #endif |
| |
| return eError; |
| } |
| |
| PVRSRV_ERROR CacheOpValExec(PMR *psPMR, |
| IMG_UINT64 uiAddress, |
| IMG_DEVMEM_OFFSET_T uiOffset, |
| IMG_DEVMEM_SIZE_T uiSize, |
| PVRSRV_CACHE_OP uiCacheOp) |
| { |
| PVRSRV_ERROR eError; |
| IMG_CPU_VIRTADDR pvAddress = (IMG_CPU_VIRTADDR)(uintptr_t)uiAddress; |
| IMG_BOOL bUseGlobalFlush = (IMG_UINT32)uiSize > gsCwq.pui32InfoPage[CACHEOP_INFO_KMGFTHRESHLD]; |
| #if defined(CACHEOP_DEBUG) |
| CACHEOP_WORK_ITEM sCacheOpWorkItem = {0}; |
| gsCwq.ui32TotalExecOps += 1; |
| gsCwq.ui32ServerSync += 1; |
| sCacheOpWorkItem.psPMR = psPMR; |
| sCacheOpWorkItem.uiSize = uiSize; |
| sCacheOpWorkItem.uiOffset = uiOffset; |
| sCacheOpWorkItem.uiCacheOp = uiCacheOp; |
| sCacheOpWorkItem.pid = OSGetCurrentClientProcessIDKM(); |
| sCacheOpWorkItem.ui32OpSeqNum = CacheOpGetNextCommonSeqNum(); |
| sCacheOpWorkItem.ui64EnqueuedTime = OSClockns64(); |
| #endif |
| |
| eError = CacheOpPMRExec(psPMR, |
| pvAddress, |
| uiOffset, |
| uiSize, |
| uiCacheOp, |
| gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0], |
| IMG_FALSE, |
| &bUseGlobalFlush); |
| PVR_LOGG_IF_ERROR(eError, "CacheOpPMRExec", e0); |
| |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.bRBF = !bUseGlobalFlush; |
| sCacheOpWorkItem.ui64ExecuteTime = OSClockns64(); |
| CacheOpStatsExecLogWrite(&sCacheOpWorkItem); |
| #endif |
| |
| e0: |
| return eError; |
| } |
| |
| PVRSRV_ERROR CacheOpQueue (CONNECTION_DATA *psConnection, |
| PVRSRV_DEVICE_NODE *psDevNode, |
| IMG_UINT32 ui32NumCacheOps, |
| PMR **ppsPMR, |
| IMG_UINT64 *puiAddress, |
| IMG_DEVMEM_OFFSET_T *puiOffset, |
| IMG_DEVMEM_SIZE_T *puiSize, |
| PVRSRV_CACHE_OP *puiCacheOp, |
| IMG_UINT32 ui32OpTimeline, |
| IMG_UINT32 ui32ClientGFSeqNum, |
| IMG_UINT32 uiCurrentFenceSeqNum, |
| IMG_UINT32 *pui32NextFenceSeqNum) |
| { |
| PVRSRV_ERROR eError; |
| PVRSRV_TIMELINE uiTimeline = (PVRSRV_TIMELINE)ui32OpTimeline; |
| IMG_CPU_VIRTADDR *pvAddress = (IMG_CPU_VIRTADDR*)(uintptr_t)puiAddress; |
| |
| PVR_UNREFERENCED_PARAMETER(psConnection); |
| |
| #if !defined(CACHEFLUSH_ISA_SUPPORTS_GLOBAL_FLUSH) |
| PVR_LOGR_IF_FALSE((ui32ClientGFSeqNum == 0), |
| "CacheOpQueue(ui32ClientGFSeqNum > 0)", |
| PVRSRV_ERROR_INVALID_PARAMS); |
| #endif |
| #if defined(CACHEOP_DEBUG) |
| gsCwq.ui32TotalExecOps += ui32NumCacheOps; |
| #endif |
| |
| if (! gsCwq.bInit) |
| { |
| PVR_LOG(("CacheOp framework not initialised, failing request")); |
| return PVRSRV_ERROR_NOT_INITIALISED; |
| } |
| else if (! ui32NumCacheOps) |
| { |
| return PVRSRV_ERROR_INVALID_PARAMS; |
| } |
| /* Ensure any single timeline CacheOp request is processed immediately */ |
| else if (ui32NumCacheOps == 1 && puiCacheOp[0] == PVRSRV_CACHE_OP_TIMELINE) |
| { |
| eError = CacheOpBatchExecTimeline(psDevNode, uiTimeline, IMG_TRUE, uiCurrentFenceSeqNum, pui32NextFenceSeqNum); |
| } |
| /* Services client explicitly requested a GF or config is GF only (i.e. no KRBF support), this takes priority */ |
| else if (CacheOpConfigSupports(CACHEOP_CONFIG_KGF) && |
| ((puiCacheOp[ui32NumCacheOps-1] & PVRSRV_CACHE_OP_GLOBAL) || !CacheOpConfigSupports(CACHEOP_CONFIG_KRBF))) |
| { |
| eError = |
| CacheOpBatchExecGlobal(psDevNode, |
| ppsPMR, |
| pvAddress, |
| puiOffset, |
| puiSize, |
| puiCacheOp, |
| ui32NumCacheOps, |
| uiTimeline, |
| ui32ClientGFSeqNum, |
| uiCurrentFenceSeqNum, |
| pui32NextFenceSeqNum); |
| } |
| /* This is the default entry for all client requests */ |
| else |
| { |
| if (!(gsCwq.eConfig & (CACHEOP_CONFIG_LAST-1))) |
| { |
| /* default the configuration before execution */ |
| CacheOpConfigUpdate(CACHEOP_CONFIG_DEFAULT); |
| } |
| |
| eError = |
| CacheOpBatchExecRangeBased(psDevNode, |
| ppsPMR, |
| pvAddress, |
| puiOffset, |
| puiSize, |
| puiCacheOp, |
| ui32NumCacheOps, |
| uiTimeline, |
| ui32ClientGFSeqNum, |
| uiCurrentFenceSeqNum, |
| pui32NextFenceSeqNum); |
| } |
| |
| return eError; |
| } |
| |
| PVRSRV_ERROR CacheOpFence (RGXFWIF_DM eFenceOpType, IMG_UINT32 ui32FenceOpSeqNum) |
| { |
| IMG_HANDLE hOSEvent; |
| PVRSRV_ERROR eError2; |
| IMG_UINT32 ui32RetryAbort; |
| IMG_UINT32 ui32CompletedOpSeqNum; |
| PVRSRV_ERROR eError = PVRSRV_OK; |
| #if defined(CACHEOP_DEBUG) |
| IMG_UINT64 uiTimeNow; |
| CACHEOP_WORK_ITEM sCacheOpWorkItem = {0}; |
| sCacheOpWorkItem.pid = OSGetCurrentClientProcessIDKM(); |
| sCacheOpWorkItem.ui32OpSeqNum = ui32FenceOpSeqNum; |
| sCacheOpWorkItem.ui64EnqueuedTime = OSClockns64(); |
| uiTimeNow = sCacheOpWorkItem.ui64EnqueuedTime; |
| #if defined(PVRSRV_ENABLE_GPU_MEMORY_INFO) && defined(DEBUG) |
| sCacheOpWorkItem.eFenceOpType = eFenceOpType; |
| #endif |
| sCacheOpWorkItem.uiSize = (uintptr_t) OSAtomicRead(&gsCwq.hCompletedSeqNum); |
| sCacheOpWorkItem.uiOffset = (uintptr_t) gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0]; |
| #endif |
| PVR_UNREFERENCED_PARAMETER(eFenceOpType); |
| |
| /* CacheOp(s) this thread is fencing for has already been satisfied by an |
| else-when GF. Another way of looking at this, if last else-when GF is |
| logically behind or momentarily disabled (zero) then we have to flush |
| the cache */ |
| if (gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] > ui32FenceOpSeqNum) |
| { |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.uiOffset = (uintptr_t) gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0]; |
| #endif |
| goto e0; |
| } |
| |
| /* If initial fence check fails, then wait-and-retry in loop */ |
| ui32CompletedOpSeqNum = OSAtomicRead(&gsCwq.hCompletedSeqNum); |
| if (CacheOpFenceCheck(ui32CompletedOpSeqNum, ui32FenceOpSeqNum)) |
| { |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.uiSize = (uintptr_t) ui32CompletedOpSeqNum; |
| #endif |
| goto e0; |
| } |
| |
| /* Open CacheOp update event object, if event open fails return error */ |
| eError2 = OSEventObjectOpen(gsCwq.hClientWakeUpEvtObj, &hOSEvent); |
| PVR_LOGG_IF_ERROR(eError2, "OSEventObjectOpen", e0); |
| |
| /* Linear (i.e. use exponential?) back-off, upper bounds user wait */ |
| for (ui32RetryAbort = gsCwq.ui32FenceRetryAbort; ;--ui32RetryAbort) |
| { |
| /* (Re)read completed CacheOp sequence number before waiting */ |
| ui32CompletedOpSeqNum = OSAtomicRead(&gsCwq.hCompletedSeqNum); |
| if (CacheOpFenceCheck(ui32CompletedOpSeqNum, ui32FenceOpSeqNum)) |
| { |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.uiSize = (uintptr_t) ui32CompletedOpSeqNum; |
| #endif |
| break; |
| } |
| |
| /* |
| For cache ISA with GF support, the wait(ms) must be set to be around |
| 25% GF overhead and as such there is no point waiting longer, we just |
| perform a GF as it means the CacheOp thread is really lagging behind. |
| Lastly, we cannot (or should not) hang the client thread indefinitely |
| so after a certain duration, we just give up. What this duration is, |
| is hard to state but for now we set it to be 1 seconds, which is the |
| product of CACHEOP_FENCE_[WAIT_TIMEOUT * RETRY_ABORT]. We ask the |
| client to retry the operation by exiting with PVRSRV_ERROR_RETRY. |
| */ |
| (void) OSEventObjectWaitTimeout(hOSEvent, gsCwq.ui32FenceWaitTimeUs); |
| if (gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] > ui32FenceOpSeqNum) |
| { |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.uiOffset = (uintptr_t) gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0]; |
| uiTimeNow = OSClockns64(); |
| #endif |
| break; |
| } |
| else if (CacheOpConfigSupports(CACHEOP_CONFIG_KGF)) |
| { |
| eError2 = CacheOpGlobalFlush(); |
| PVR_LOG_IF_ERROR(eError2, "CacheOpGlobalFlush"); |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.uiCacheOp = PVRSRV_CACHE_OP_GLOBAL; |
| sCacheOpWorkItem.uiOffset = (uintptr_t) gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0]; |
| uiTimeNow = OSClockns64(); |
| #endif |
| break; |
| } |
| else if (! ui32RetryAbort) |
| { |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.uiSize = (uintptr_t) OSAtomicRead(&gsCwq.hCompletedSeqNum); |
| sCacheOpWorkItem.uiOffset = (uintptr_t) gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0]; |
| uiTimeNow = OSClockns64(); |
| #endif |
| PVR_LOG(("CacheOpFence() event: "CACHEOP_ABORT_FENCE_ERROR_STRING)); |
| eError = PVRSRV_ERROR_RETRY; |
| break; |
| } |
| else |
| { |
| #if defined(CACHEOP_DEBUG) |
| uiTimeNow = OSClockns64(); |
| #endif |
| } |
| } |
| |
| eError2 = OSEventObjectClose(hOSEvent); |
| PVR_LOG_IF_ERROR(eError2, "OSEventObjectOpen"); |
| |
| e0: |
| #if defined(CACHEOP_DEBUG) |
| sCacheOpWorkItem.ui64ExecuteTime = uiTimeNow; |
| if (ui32FenceOpSeqNum) |
| { |
| /* Only fence(s) pending on CacheOp(s) contribute towards statistics, |
| here we calculate the rolling approximate average waiting time |
| for these fence(s) */ |
| IMG_UINT32 ui64EnqueuedTime = sCacheOpWorkItem.ui64EnqueuedTime; |
| IMG_UINT32 ui64ExecuteTime = sCacheOpWorkItem.ui64ExecuteTime; |
| IMG_UINT32 ui32Time = ui64EnqueuedTime < ui64ExecuteTime ? |
| ui64ExecuteTime - ui64EnqueuedTime : |
| ui64EnqueuedTime - ui64ExecuteTime; |
| ui32Time = DivBy10(DivBy10(DivBy10(ui32Time))); |
| gsCwq.ui32TotalFenceOps += 1; |
| if (gsCwq.ui32TotalFenceOps > 2) |
| { |
| gsCwq.ui32AvgFenceTime -= (gsCwq.ui32AvgFenceTime / gsCwq.ui32TotalFenceOps); |
| gsCwq.ui32AvgFenceTime += (ui32Time / gsCwq.ui32TotalFenceOps); |
| } |
| else if (ui32Time) |
| { |
| gsCwq.ui32AvgFenceTime = (IMG_UINT32)ui32Time; |
| } |
| } |
| CacheOpStatsExecLogWrite(&sCacheOpWorkItem); |
| #endif |
| |
| return eError; |
| } |
| |
| PVRSRV_ERROR CacheOpLog (PMR *psPMR, |
| IMG_UINT64 puiAddress, |
| IMG_DEVMEM_OFFSET_T uiOffset, |
| IMG_DEVMEM_SIZE_T uiSize, |
| IMG_UINT64 ui64EnqueuedTimeUs, |
| IMG_UINT64 ui64ExecuteTimeUs, |
| IMG_UINT32 ui32NumRBF, |
| IMG_BOOL bIsDiscard, |
| PVRSRV_CACHE_OP uiCacheOp) |
| { |
| #if defined(CACHEOP_DEBUG) |
| CACHEOP_WORK_ITEM sCacheOpWorkItem = {0}; |
| PVR_UNREFERENCED_PARAMETER(puiAddress); |
| |
| sCacheOpWorkItem.psPMR = psPMR; |
| sCacheOpWorkItem.uiSize = uiSize; |
| sCacheOpWorkItem.uiOffset = uiOffset; |
| sCacheOpWorkItem.uiCacheOp = uiCacheOp; |
| sCacheOpWorkItem.pid = OSGetCurrentClientProcessIDKM(); |
| sCacheOpWorkItem.ui32OpSeqNum = CacheOpGetNextCommonSeqNum(); |
| |
| sCacheOpWorkItem.ui64EnqueuedTime = ui64EnqueuedTimeUs; |
| sCacheOpWorkItem.ui64ExecuteTime = ui64ExecuteTimeUs; |
| sCacheOpWorkItem.bUMF = IMG_TRUE; |
| sCacheOpWorkItem.bRBF = bIsDiscard ? IMG_FALSE : IMG_TRUE; |
| gsCwq.ui32UMDiscards += bIsDiscard ? 1 : 0; |
| gsCwq.ui32ClientRBF += bIsDiscard ? 0 : ui32NumRBF; |
| gsCwq.ui32ClientSync += 1; |
| gsCwq.ui32TotalExecOps += 1; |
| |
| CacheOpStatsExecLogWrite(&sCacheOpWorkItem); |
| #else |
| PVR_UNREFERENCED_PARAMETER(psPMR); |
| PVR_UNREFERENCED_PARAMETER(uiSize); |
| PVR_UNREFERENCED_PARAMETER(uiOffset); |
| PVR_UNREFERENCED_PARAMETER(uiCacheOp); |
| PVR_UNREFERENCED_PARAMETER(ui32NumRBF); |
| PVR_UNREFERENCED_PARAMETER(puiAddress); |
| PVR_UNREFERENCED_PARAMETER(ui64ExecuteTimeUs); |
| PVR_UNREFERENCED_PARAMETER(ui64EnqueuedTimeUs); |
| #endif |
| return PVRSRV_OK; |
| } |
| |
| PVRSRV_ERROR CacheOpInit2 (void) |
| { |
| PVRSRV_ERROR eError; |
| PVRSRV_DATA *psPVRSRVData = PVRSRVGetPVRSRVData(); |
| |
| /* Create an event object for pending CacheOp work items */ |
| eError = OSEventObjectCreate("PVRSRV_CACHEOP_EVENTOBJECT", &gsCwq.hThreadWakeUpEvtObj); |
| PVR_LOGG_IF_ERROR(eError, "OSEventObjectCreate", e0); |
| |
| /* Create an event object for updating pending fence checks on CacheOp */ |
| eError = OSEventObjectCreate("PVRSRV_CACHEOP_EVENTOBJECT", &gsCwq.hClientWakeUpEvtObj); |
| PVR_LOGG_IF_ERROR(eError, "OSEventObjectCreate", e0); |
| |
| /* Appending work-items is not concurrent, lock protects against this */ |
| eError = OSLockCreate((POS_LOCK*)&gsCwq.hDeferredLock); |
| PVR_LOGG_IF_ERROR(eError, "OSLockCreate", e0); |
| |
| /* Apphint read/write is not concurrent, so lock protects against this */ |
| eError = OSLockCreate((POS_LOCK*)&gsCwq.hConfigLock); |
| PVR_LOGG_IF_ERROR(eError, "OSLockCreate", e0); |
| |
| /* Determine CPU cache ISA maintenance mechanism available, GF and UMF */ |
| #if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) |
| gsCwq.bNoGlobalFlushImpl = IMG_TRUE; |
| #else |
| gsCwq.bNoGlobalFlushImpl = (OSCPUOperation(PVRSRV_CACHE_OP_FLUSH) != PVRSRV_OK) ? IMG_TRUE : IMG_FALSE; |
| #endif |
| if (! gsCwq.bNoGlobalFlushImpl) |
| { |
| IMG_UINT64 uiIdx; |
| IMG_UINT64 uiTime = 0; |
| IMG_UINT64 uiTimeAfter; |
| IMG_UINT64 uiTimeBefore; |
| |
| for (uiIdx = 0; uiIdx < 4; uiIdx++) |
| { |
| /* Take average of four GF */ |
| uiTimeBefore = OSClockns64(); |
| (void) OSCPUOperation(PVRSRV_CACHE_OP_FLUSH); |
| uiTimeAfter = OSClockns64(); |
| |
| uiTimeBefore = DivBy10(DivBy10(DivBy10(uiTimeBefore))); |
| uiTimeAfter = DivBy10(DivBy10(DivBy10(uiTimeAfter))); |
| uiTime += uiTimeBefore < uiTimeAfter ? |
| uiTimeAfter - uiTimeBefore : |
| uiTimeBefore - uiTimeAfter; |
| } |
| |
| gsCwq.ui32FenceWaitTimeUs = (IMG_UINT32)(uiTime >> 2); |
| gsCwq.ui32FenceRetryAbort = ~0; |
| } |
| else |
| { |
| gsCwq.ui32FenceWaitTimeUs = CACHEOP_FENCE_WAIT_TIMEOUT; |
| gsCwq.ui32FenceRetryAbort = CACHEOP_FENCE_RETRY_ABORT; |
| } |
| #if defined(CACHEFLUSH_ISA_SUPPORTS_UM_FLUSH) |
| gsCwq.bSupportsUMFlush = IMG_TRUE; |
| #else |
| gsCwq.bSupportsUMFlush = IMG_FALSE; |
| #endif |
| |
| gsCwq.pui32InfoPage = psPVRSRVData->pui32InfoPage; |
| gsCwq.psInfoPagePMR = psPVRSRVData->psInfoPagePMR; |
| |
| /* Normally, platforms should use their default configurations, put exceptions here */ |
| #if defined(__i386__) || defined(__x86_64__) |
| #if !defined(TC_MEMORY_CONFIG) |
| CacheOpConfigUpdate(CACHEOP_CONFIG_URBF | CACHEOP_CONFIG_KGF | CACHEOP_CONFIG_KDF); |
| #else |
| CacheOpConfigUpdate(CACHEOP_CONFIG_KGF | CACHEOP_CONFIG_KDF); |
| #endif |
| #else /* defined(__x86__) */ |
| CacheOpConfigUpdate(CACHEOP_CONFIG_DEFAULT); |
| #endif |
| |
| /* Initialise the remaining occupants of the CacheOp information page */ |
| gsCwq.pui32InfoPage[CACHEOP_INFO_PGSIZE] = (IMG_UINT32)gsCwq.uiPageSize; |
| gsCwq.pui32InfoPage[CACHEOP_INFO_LINESIZE] = (IMG_UINT32)gsCwq.uiLineSize; |
| gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM0] = (IMG_UINT32)0; |
| gsCwq.pui32InfoPage[CACHEOP_INFO_GFSEQNUM1] = (IMG_UINT32)0; |
| |
| /* Set before spawning thread */ |
| gsCwq.bInit = IMG_TRUE; |
| |
| /* Create a thread which is used to execute the deferred CacheOp(s), |
| these are CacheOp(s) executed by the server on behalf of clients |
| asynchronously. All clients synchronise with the server before |
| submitting any HW operation (i.e. device kicks) to ensure that |
| client device work-load memory is coherent */ |
| eError = OSThreadCreatePriority(&gsCwq.hWorkerThread, |
| "pvr_cacheop", |
| CacheOpThread, |
| CacheOpThreadDumpInfo, |
| IMG_TRUE, |
| psPVRSRVData, |
| OS_THREAD_HIGHEST_PRIORITY); |
| PVR_LOGG_IF_ERROR(eError, "OSThreadCreatePriority", e0); |
| |
| /* Writing the unsigned integer binary encoding of CACHEOP_CONFIG |
| into this file cycles through avail. configuration(s) */ |
| gsCwq.pvConfigTune = OSCreateStatisticEntry("cacheop_config", |
| NULL, |
| CacheOpConfigRead, |
| NULL); |
| PVR_LOGG_IF_FALSE(gsCwq.pvConfigTune, "OSCreateStatisticEntry", e0); |
| |
| /* Register the CacheOp framework (re)configuration handlers */ |
| PVRSRVAppHintRegisterHandlersUINT32(APPHINT_ID_CacheOpConfig, |
| CacheOpConfigQuery, |
| CacheOpConfigSet, |
| APPHINT_OF_DRIVER_NO_DEVICE, |
| (void *) APPHINT_ID_CacheOpConfig); |
| |
| PVRSRVAppHintRegisterHandlersUINT32(APPHINT_ID_CacheOpGFThresholdSize, |
| CacheOpConfigQuery, |
| CacheOpConfigSet, |
| APPHINT_OF_DRIVER_NO_DEVICE, |
| (void *) APPHINT_ID_CacheOpGFThresholdSize); |
| |
| PVRSRVAppHintRegisterHandlersUINT32(APPHINT_ID_CacheOpUMKMThresholdSize, |
| CacheOpConfigQuery, |
| CacheOpConfigSet, |
| APPHINT_OF_DRIVER_NO_DEVICE, |
| (void *) APPHINT_ID_CacheOpUMKMThresholdSize); |
| |
| return PVRSRV_OK; |
| e0: |
| CacheOpDeInit2(); |
| return eError; |
| } |
| |
| void CacheOpDeInit2 (void) |
| { |
| PVRSRV_ERROR eError = PVRSRV_OK; |
| |
| gsCwq.bInit = IMG_FALSE; |
| |
| if (gsCwq.hThreadWakeUpEvtObj) |
| { |
| eError = OSEventObjectSignal(gsCwq.hThreadWakeUpEvtObj); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectSignal"); |
| } |
| |
| if (gsCwq.hClientWakeUpEvtObj) |
| { |
| eError = OSEventObjectSignal(gsCwq.hClientWakeUpEvtObj); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectSignal"); |
| } |
| |
| if (gsCwq.hWorkerThread) |
| { |
| LOOP_UNTIL_TIMEOUT(OS_THREAD_DESTROY_TIMEOUT_US) |
| { |
| eError = OSThreadDestroy(gsCwq.hWorkerThread); |
| if (PVRSRV_OK == eError) |
| { |
| gsCwq.hWorkerThread = NULL; |
| break; |
| } |
| OSWaitus(OS_THREAD_DESTROY_TIMEOUT_US/OS_THREAD_DESTROY_RETRY_COUNT); |
| } END_LOOP_UNTIL_TIMEOUT(); |
| PVR_LOG_IF_ERROR(eError, "OSThreadDestroy"); |
| gsCwq.hWorkerThread = NULL; |
| } |
| |
| if (gsCwq.hClientWakeUpEvtObj) |
| { |
| eError = OSEventObjectDestroy(gsCwq.hClientWakeUpEvtObj); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectDestroy"); |
| gsCwq.hClientWakeUpEvtObj = NULL; |
| } |
| |
| if (gsCwq.hThreadWakeUpEvtObj) |
| { |
| eError = OSEventObjectDestroy(gsCwq.hThreadWakeUpEvtObj); |
| PVR_LOG_IF_ERROR(eError, "OSEventObjectDestroy"); |
| gsCwq.hThreadWakeUpEvtObj = NULL; |
| } |
| |
| if (gsCwq.hConfigLock) |
| { |
| eError = OSLockDestroy(gsCwq.hConfigLock); |
| PVR_LOG_IF_ERROR(eError, "OSLockDestroy"); |
| gsCwq.hConfigLock = NULL; |
| } |
| |
| if (gsCwq.hDeferredLock) |
| { |
| eError = OSLockDestroy(gsCwq.hDeferredLock); |
| PVR_LOG_IF_ERROR(eError, "OSLockDestroy"); |
| gsCwq.hDeferredLock = NULL; |
| } |
| |
| if (gsCwq.pvConfigTune) |
| { |
| OSRemoveStatisticEntry(&gsCwq.pvConfigTune); |
| } |
| |
| gsCwq.pui32InfoPage = NULL; |
| gsCwq.psInfoPagePMR = NULL; |
| } |
| |
| PVRSRV_ERROR CacheOpInit (void) |
| { |
| IMG_UINT32 idx; |
| PVRSRV_ERROR eError = PVRSRV_OK; |
| |
| /* DDK initialisation is anticipated to be performed on the boot |
| processor (little core in big/little systems) though this may |
| not always be the case. If so, the value cached here is the |
| system wide safe (i.e. smallest) L1 d-cache line size value |
| on any/such platforms with mismatched d-cache line sizes */ |
| gsCwq.uiPageSize = OSGetPageSize(); |
| gsCwq.uiPageShift = OSGetPageShift(); |
| gsCwq.uiLineSize = OSCPUCacheAttributeSize(PVR_DCACHE_LINE_SIZE); |
| gsCwq.uiLineShift = ExactLog2(gsCwq.uiLineSize); |
| PVR_LOGR_IF_FALSE((gsCwq.uiLineSize && gsCwq.uiPageSize && gsCwq.uiPageShift), "", PVRSRV_ERROR_INIT_FAILURE); |
| gsCwq.uiCacheOpAddrType = OSCPUCacheOpAddressType(); |
| |
| /* More information regarding these atomic counters can be found |
| in the CACHEOP_WORK_QUEUE type definition at top of file */ |
| OSAtomicWrite(&gsCwq.hCompletedSeqNum, 0); |
| OSAtomicWrite(&gsCwq.hCommonSeqNum, 0); |
| OSAtomicWrite(&gsCwq.hDeferredSize, 0); |
| OSAtomicWrite(&gsCwq.hWriteCounter, 0); |
| OSAtomicWrite(&gsCwq.hReadCounter, 0); |
| |
| for (idx = 0; idx < CACHEOP_INDICES_MAX; idx++) |
| { |
| gsCwq.asWorkItems[idx].iTimeline = PVRSRV_NO_TIMELINE; |
| gsCwq.asWorkItems[idx].psPMR = (void *)(uintptr_t)~0; |
| gsCwq.asWorkItems[idx].ui32OpSeqNum = (IMG_UINT32)~0; |
| gsCwq.asWorkItems[idx].ui32GFSeqNum = (IMG_UINT32)~0; |
| } |
| |
| /* Lock prevents multiple threads from issuing surplus to requirement GF */ |
| eError = OSLockCreate((POS_LOCK*)&gsCwq.hGlobalFlushLock); |
| PVR_LOGG_IF_ERROR(eError, "OSLockCreate", e0); |
| |
| #if defined(CACHEOP_DEBUG) |
| /* debugfs file read-out is not concurrent, so lock protects against this */ |
| eError = OSLockCreate((POS_LOCK*)&gsCwq.hStatsExecLock); |
| PVR_LOGG_IF_ERROR(eError, "OSLockCreate", e0); |
| |
| gsCwq.i32StatsExecWriteIdx = 0; |
| OSCachedMemSet(gsCwq.asStatsExecuted, 0, sizeof(gsCwq.asStatsExecuted)); |
| |
| /* File captures the most recent subset of CacheOp(s) executed */ |
| gsCwq.pvStatsEntry = OSCreateStatisticEntry("cacheop_history", |
| NULL, |
| CacheOpStatsExecLogRead, |
| NULL); |
| PVR_LOGG_IF_ERROR(eError, "OSCreateStatisticEntry", e0); |
| #endif |
| |
| e0: |
| return eError; |
| } |
| |
| void CacheOpDeInit (void) |
| { |
| #if defined(CACHEOP_DEBUG) |
| if (gsCwq.hStatsExecLock) |
| { |
| (void) OSLockDestroy(gsCwq.hStatsExecLock); |
| gsCwq.hStatsExecLock = NULL; |
| } |
| |
| if (gsCwq.pvStatsEntry) |
| { |
| OSRemoveStatisticEntry(&gsCwq.pvStatsEntry); |
| } |
| #endif |
| if (gsCwq.hGlobalFlushLock) |
| { |
| (void) OSLockDestroy(gsCwq.hGlobalFlushLock); |
| gsCwq.hGlobalFlushLock = NULL; |
| } |
| } |