staging: gasket: cleanup extended page table map/unmap

Make sure to use dma_sync_single_for_device whenever updating the
contents of an extended page table. This performs the cache flush using
the correct function from the DMA-API. Also move this call outside of
the inner loop to improve performance.

Tested: Ran run_tests with inception, testfullyconnected with parameter
caching, and rnntransducerdecoderpie107m on AT Enterprise.

Change-Id: Ie3c63113dde8cafe448d515e8970810afc9564ac
Signed-off-by: Nick Ewalt <nicholasewalt@google.com>
diff --git a/drivers/staging/gasket/gasket_page_table.c b/drivers/staging/gasket/gasket_page_table.c
index f7fd9b8..a6feb15 100644
--- a/drivers/staging/gasket/gasket_page_table.c
+++ b/drivers/staging/gasket/gasket_page_table.c
@@ -557,17 +557,10 @@
 		/* Make the DMA-space address available to the device. */
 		dma_addr = (ptes[i].dma_addr + offset) | GASKET_VALID_SLOT_FLAG;
 
-		if (is_simple_mapping) {
+		if (is_simple_mapping)
 			writeq(dma_addr, &slots[i]);
-		} else {
+		else
 			((u64 __force *)slots)[i] = dma_addr;
-			/* Extended page table vectors are in DRAM,
-			 * and so need to be synced each time they are updated.
-			 */
-			dma_map_single(pg_tbl->device,
-				       (void *)&((u64 __force *)slots)[i],
-				       sizeof(u64), DMA_TO_DEVICE);
-		}
 
 		/* Set PTE flags equal to flags param with STATUS=PTE_INUSE. */
 		ptes[i].flags = SET(FLAGS_STATUS, flags, PTE_INUSE);
@@ -639,14 +632,10 @@
 	 */
 	for (i = 0; i < num_pages; i++) {
 		/* release the address from the device, */
-		if (is_simple_mapping ||
-		    GET(FLAGS_STATUS, ptes[i].flags) == PTE_INUSE) {
+		if (is_simple_mapping)
 			writeq(0, &slots[i]);
-		} else {
+		else
 			((u64 __force *)slots)[i] = 0;
-			/* sync above PTE update before updating mappings */
-			wmb();
-		}
 
 		/* release the address from the driver, */
 		if (GET(FLAGS_STATUS, ptes[i].flags) == PTE_INUSE) {
@@ -702,6 +691,13 @@
 			gasket_perform_unmapping(pg_tbl,
 						 pte->sublevel + slot_idx,
 						 slot_base + slot_idx, len, 0);
+			/*
+			 * Extended page tables are in DRAM so they need to be
+			 * synced each time they are updated.
+			 */
+			dma_sync_single_for_device(pg_tbl->device,
+						   pte->dma_addr + slot_idx * sizeof(u64),
+						   len * sizeof(u64), DMA_TO_DEVICE);
 		}
 
 		remain -= len;
@@ -1045,6 +1041,14 @@
 			return ret;
 		}
 
+		/*
+		 * Extended page tables are in DRAM so they need to be synced
+		 * each time they are updated.
+		 */
+		dma_sync_single_for_device(pg_tbl->device,
+					   pte->dma_addr + slot_idx * sizeof(u64),
+					   len * sizeof(u64), DMA_TO_DEVICE);
+
 		remain -= len;
 		slot_idx = 0;
 		pte++;