drm/amdgpu: invalidate only the currently needed VMHUB v2

Drop invalidating both hubs from each engine.

v2: don't use hardcoded values

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index b542922..dc3a214 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1034,42 +1034,38 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
 static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	uint32_t data0, data1, mask;
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
+	data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
+	data1 = upper_32_bits(pd_addr);
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
 
-		data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
-		data1 = upper_32_bits(pd_addr);
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
+	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data1 = lower_32_bits(pd_addr);
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
 
-		data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
-		data1 = lower_32_bits(pd_addr);
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
+	data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
+	data1 = lower_32_bits(pd_addr);
+	mask = 0xffffffff;
+	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
 
-		data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2;
-		data1 = lower_32_bits(pd_addr);
-		mask = 0xffffffff;
-		uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
+	/* flush TLB */
+	data0 = (hub->vm_inv_eng0_req + eng) << 2;
+	data1 = req;
+	uvd_v7_0_vm_reg_write(ring, data0, data1);
 
-		/* flush TLB */
-		data0 = (hub->vm_inv_eng0_req + eng) << 2;
-		data1 = req;
-		uvd_v7_0_vm_reg_write(ring, data0, data1);
-
-		/* wait for flush */
-		data0 = (hub->vm_inv_eng0_ack + eng) << 2;
-		data1 = 1 << vm_id;
-		mask =  1 << vm_id;
-		uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
-	}
+	/* wait for flush */
+	data0 = (hub->vm_inv_eng0_ack + eng) << 2;
+	data1 = 1 << vm_id;
+	mask =  1 << vm_id;
+	uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
 }
 
 static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
@@ -1080,44 +1076,37 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
 static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 			 unsigned int vm_id, uint64_t pd_addr)
 {
+	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
-	unsigned i;
 
 	pd_addr = pd_addr | 0x1; /* valid bit */
 	/* now only use physical base address of PDE and valid */
 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
 
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
-		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, upper_32_bits(pd_addr));
 
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring,	(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
+	amdgpu_ring_write(ring, 0xffffffff);
+	amdgpu_ring_write(ring, lower_32_bits(pd_addr));
 
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-		amdgpu_ring_write(ring,
-			(hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2);
-		amdgpu_ring_write(ring, 0xffffffff);
-		amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+	/* flush TLB */
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
+	amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
+	amdgpu_ring_write(ring, req);
 
-		/* flush TLB */
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
-		amdgpu_ring_write(ring,	(hub->vm_inv_eng0_req + eng) << 2);
-		amdgpu_ring_write(ring, req);
-
-		/* wait for flush */
-		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
-		amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
-		amdgpu_ring_write(ring, 1 << vm_id);
-		amdgpu_ring_write(ring, 1 << vm_id);
-	}
+	/* wait for flush */
+	amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, 1 << vm_id);
 }
 
 #if 0
@@ -1455,7 +1444,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.emit_frame_size =
 		2 + /* uvd_v7_0_ring_emit_hdp_flush */
 		2 + /* uvd_v7_0_ring_emit_hdp_invalidate */
-		34 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_ring_emit_vm_flush */
+		34 + /* uvd_v7_0_ring_emit_vm_flush */
 		14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
 	.emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
 	.emit_ib = uvd_v7_0_ring_emit_ib,
@@ -1481,7 +1470,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 	.get_wptr = uvd_v7_0_enc_ring_get_wptr,
 	.set_wptr = uvd_v7_0_enc_ring_set_wptr,
 	.emit_frame_size =
-		17 * AMDGPU_MAX_VMHUBS + /* uvd_v7_0_enc_ring_emit_vm_flush */
+		17 + /* uvd_v7_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
 		1, /* uvd_v7_0_enc_ring_insert_end */
 	.emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */