diff options
author | James Zhu <James.Zhu@amd.com> | 2018-05-15 14:31:24 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2018-05-18 16:08:12 -0500 |
commit | 10dd74eac4dba963bfa97f5092040aa75ff742d6 (patch) | |
tree | b93a33adbbe061247ae98fb024b6100ad86407d9 /drivers | |
parent | 2bb795f5ba9cd676536858a978b9df06f473af88 (diff) |
drm/amdgpu/vg20:Restruct uvd.inst to support multiple instances
Vega20 has dual-UVD. Need add multiple instances support for uvd.
Restruct uvd.inst, using uvd.inst[0] to replace uvd.inst->.
Repurpose amdgpu_ring::me for instance index, and initialize to 0.
There are no any logical changes here.
Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 229 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 1002 |
5 files changed, 660 insertions, 590 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 1070f4042cbb..39ec6b8890a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -376,14 +376,14 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; uint64_t index; - if (ring != &adev->uvd.inst->ring) { + if (ring != &adev->uvd.inst[ring->me].ring) { ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); } else { /* put fence directly behind firmware */ index = ALIGN(adev->uvd.fw->size, 8); - ring->fence_drv.cpu_addr = adev->uvd.inst->cpu_addr + index; - ring->fence_drv.gpu_addr = adev->uvd.inst->gpu_addr + index; + ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index; + ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index; } amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); amdgpu_irq_get(adev, irq_src, irq_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 5620ed291107..91517b166a3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -286,7 +286,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_crtc *crtc; uint32_t ui32 = 0; uint64_t ui64 = 0; - int i, found; + int i, j, found; int ui32_size = sizeof(ui32); if (!info->return_size || !info->return_pointer) @@ -348,7 +348,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file break; case AMDGPU_HW_IP_UVD: type = AMD_IP_BLOCK_TYPE_UVD; - ring_mask = adev->uvd.inst->ring.ready ? 1 : 0; + for (i = 0; i < adev->uvd.num_uvd_inst; i++) + ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i); ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; ib_size_alignment = 16; break; @@ -361,8 +362,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file break; case AMDGPU_HW_IP_UVD_ENC: type = AMD_IP_BLOCK_TYPE_UVD; - for (i = 0; i < adev->uvd.num_enc_rings; i++) - ring_mask |= ((adev->uvd.inst->ring_enc[i].ready ? 1 : 0) << i); + for (i = 0; i < adev->uvd.num_uvd_inst; i++) + for (j = 0; j < adev->uvd.num_enc_rings; j++) + ring_mask |= + ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) << + (j + i * adev->uvd.num_enc_rings)); ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; ib_size_alignment = 1; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 49cad08b5c16..c6850b629d0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -362,6 +362,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; + ring->me = 0; ring->adev->rings[ring->idx] = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 02683a039a98..e961492d357a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -127,7 +127,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned version_major, version_minor, family_id; - int i, r; + int i, j, r; INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler); @@ -236,28 +236,30 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst->vcpu_bo, - &adev->uvd.inst->gpu_addr, &adev->uvd.inst->cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); - return r; - } + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { - ring = &adev->uvd.inst->ring; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity, - rq, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up UVD run queue.\n"); - return r; - } + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo, + &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r); + return r; + } - for (i = 0; i < adev->uvd.max_handles; ++i) { - atomic_set(&adev->uvd.inst->handles[i], 0); - adev->uvd.inst->filp[i] = NULL; - } + ring = &adev->uvd.inst[j].ring; + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity, + rq, NULL); + if (r != 0) { + DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j); + return r; + } + for (i = 0; i < adev->uvd.max_handles; ++i) { + atomic_set(&adev->uvd.inst[j].handles[i], 0); + adev->uvd.inst[j].filp[i] = NULL; + } + } /* from uvd v5.0 HW addressing capacity increased to 64 bits */ if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0)) adev->uvd.address_64_bit = true; @@ -284,20 +286,22 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { - int i; - kfree(adev->uvd.inst->saved_bo); + int i, j; - drm_sched_entity_fini(&adev->uvd.inst->ring.sched, &adev->uvd.inst->entity); + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + kfree(adev->uvd.inst[j].saved_bo); - amdgpu_bo_free_kernel(&adev->uvd.inst->vcpu_bo, - &adev->uvd.inst->gpu_addr, - (void **)&adev->uvd.inst->cpu_addr); + drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity); - amdgpu_ring_fini(&adev->uvd.inst->ring); + amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo, + &adev->uvd.inst[j].gpu_addr, + (void **)&adev->uvd.inst[j].cpu_addr); - for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) - amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]); + amdgpu_ring_fini(&adev->uvd.inst[j].ring); + for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i) + amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); + } release_firmware(adev->uvd.fw); return 0; @@ -307,32 +311,33 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) { unsigned size; void *ptr; - int i; + int i, j; - if (adev->uvd.inst->vcpu_bo == NULL) - return 0; + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + if (adev->uvd.inst[j].vcpu_bo == NULL) + continue; - cancel_delayed_work_sync(&adev->uvd.inst->idle_work); + cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work); - /* only valid for physical mode */ - if (adev->asic_type < CHIP_POLARIS10) { - for (i = 0; i < adev->uvd.max_handles; ++i) - if (atomic_read(&adev->uvd.inst->handles[i])) - break; + /* only valid for physical mode */ + if (adev->asic_type < CHIP_POLARIS10) { + for (i = 0; i < adev->uvd.max_handles; ++i) + if (atomic_read(&adev->uvd.inst[j].handles[i])) + break; - if (i == adev->uvd.max_handles) - return 0; - } - - size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo); - ptr = adev->uvd.inst->cpu_addr; + if (i == adev->uvd.max_handles) + continue; + } - adev->uvd.inst->saved_bo = kmalloc(size, GFP_KERNEL); - if (!adev->uvd.inst->saved_bo) - return -ENOMEM; + size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo); + ptr = adev->uvd.inst[j].cpu_addr; - memcpy_fromio(adev->uvd.inst->saved_bo, ptr, size); + adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL); + if (!adev->uvd.inst[j].saved_bo) + return -ENOMEM; + memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); + } return 0; } @@ -340,59 +345,65 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) { unsigned size; void *ptr; + int i; - if (adev->uvd.inst->vcpu_bo == NULL) - return -EINVAL; + for (i = 0; i < adev->uvd.num_uvd_inst; i++) { + if (adev->uvd.inst[i].vcpu_bo == NULL) + return -EINVAL; - size = amdgpu_bo_size(adev->uvd.inst->vcpu_bo); - ptr = adev->uvd.inst->cpu_addr; + size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo); + ptr = adev->uvd.inst[i].cpu_addr; - if (adev->uvd.inst->saved_bo != NULL) { - memcpy_toio(ptr, adev->uvd.inst->saved_bo, size); - kfree(adev->uvd.inst->saved_bo); - adev->uvd.inst->saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned offset; - - hdr = (const struct common_firmware_header *)adev->uvd.fw->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->uvd.inst->cpu_addr, adev->uvd.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + if (adev->uvd.inst[i].saved_bo != NULL) { + memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size); + kfree(adev->uvd.inst[i].saved_bo); + adev->uvd.inst[i].saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->uvd.fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } + memset_io(ptr, 0, size); + /* to restore uvd fence seq */ + amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring); } - memset_io(ptr, 0, size); - /* to restore uvd fence seq */ - amdgpu_fence_driver_force_completion(&adev->uvd.inst->ring); } - return 0; } void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) { - struct amdgpu_ring *ring = &adev->uvd.inst->ring; - int i, r; + struct amdgpu_ring *ring; + int i, j, r; - for (i = 0; i < adev->uvd.max_handles; ++i) { - uint32_t handle = atomic_read(&adev->uvd.inst->handles[i]); - if (handle != 0 && adev->uvd.inst->filp[i] == filp) { - struct dma_fence *fence; - - r = amdgpu_uvd_get_destroy_msg(ring, handle, - false, &fence); - if (r) { - DRM_ERROR("Error destroying UVD (%d)!\n", r); - continue; - } + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + ring = &adev->uvd.inst[j].ring; - dma_fence_wait(fence, false); - dma_fence_put(fence); + for (i = 0; i < adev->uvd.max_handles; ++i) { + uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]); + if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) { + struct dma_fence *fence; + + r = amdgpu_uvd_get_destroy_msg(ring, handle, + false, &fence); + if (r) { + DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r); + continue; + } - adev->uvd.inst->filp[i] = NULL; - atomic_set(&adev->uvd.inst->handles[i], 0); + dma_fence_wait(fence, false); + dma_fence_put(fence); + + adev->uvd.inst[j].filp[i] = NULL; + atomic_set(&adev->uvd.inst[j].handles[i], 0); + } } } } @@ -667,15 +678,16 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, void *ptr; long r; int i; + uint32_t ip_instance = ctx->parser->job->ring->me; if (offset & 0x3F) { - DRM_ERROR("UVD messages must be 64 byte aligned!\n"); + DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance); return -EINVAL; } r = amdgpu_bo_kmap(bo, &ptr); if (r) { - DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); + DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r); return r; } @@ -685,7 +697,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, handle = msg[2]; if (handle == 0) { - DRM_ERROR("Invalid UVD handle!\n"); + DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance); return -EINVAL; } @@ -696,18 +708,18 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, /* try to alloc a new handle */ for (i = 0; i < adev->uvd.max_handles; ++i) { - if (atomic_read(&adev->uvd.inst->handles[i]) == handle) { - DRM_ERROR("Handle 0x%x already in use!\n", handle); + if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { + DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle); return -EINVAL; } - if (!atomic_cmpxchg(&adev->uvd.inst->handles[i], 0, handle)) { - adev->uvd.inst->filp[i] = ctx->parser->filp; + if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) { + adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp; return 0; } } - DRM_ERROR("No more free UVD handles!\n"); + DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance); return -ENOSPC; case 1: @@ -719,27 +731,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, /* validate the handle */ for (i = 0; i < adev->uvd.max_handles; ++i) { - if (atomic_read(&adev->uvd.inst->handles[i]) == handle) { - if (adev->uvd.inst->filp[i] != ctx->parser->filp) { - DRM_ERROR("UVD handle collision detected!\n"); + if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) { + if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) { + DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance); return -EINVAL; } return 0; } } - DRM_ERROR("Invalid UVD handle 0x%x!\n", handle); + DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle); return -ENOENT; case 2: /* it's a destroy msg, free the handle */ for (i = 0; i < adev->uvd.max_handles; ++i) - atomic_cmpxchg(&adev->uvd.inst->handles[i], handle, 0); + atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0); amdgpu_bo_kunmap(bo); return 0; default: - DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); + DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type); return -EINVAL; } BUG(); @@ -1043,7 +1055,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, if (r) goto err_free; - r = amdgpu_job_submit(job, ring, &adev->uvd.inst->entity, + r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity, AMDGPU_FENCE_OWNER_UNDEFINED, &f); if (r) goto err_free; @@ -1189,27 +1201,28 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence; long r; + uint32_t ip_instance = ring->me; r = amdgpu_uvd_get_create_msg(ring, 1, NULL); if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r); goto error; } r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r); goto error; } r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance); r = -ETIMEDOUT; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r); } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx); r = 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 66d4bea5fb2c..08f3b6c84bea 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -58,7 +58,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); } /** @@ -72,10 +72,10 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->uvd.inst->ring_enc[0]) - return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); + if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); else - return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); } /** @@ -89,7 +89,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); } /** @@ -106,10 +106,10 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; - if (ring == &adev->uvd.inst->ring_enc[0]) - return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); else - return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); } /** @@ -123,7 +123,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } /** @@ -144,11 +144,11 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) return; } - if (ring == &adev->uvd.inst->ring_enc[0]) - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, + if (ring == &adev->uvd.inst[ring->me].ring_enc[0]) + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else - WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); } @@ -387,19 +387,21 @@ static int uvd_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; struct drm_sched_rq *rq; - int i, r; + int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.inst->irq); - if (r) - return r; - - /* UVD ENC TRAP */ - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.inst->irq); + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + /* UVD TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.inst[j].irq); if (r) return r; + + /* UVD ENC TRAP */ + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.inst[j].irq); + if (r) + return r; + } } r = amdgpu_uvd_sw_init(adev); @@ -416,43 +418,48 @@ static int uvd_v7_0_sw_init(void *handle) DRM_INFO("PSP loading UVD firmware\n"); } - ring = &adev->uvd.inst->ring_enc[0]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; - r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc, - rq, NULL); - if (r) { - DRM_ERROR("Failed setting up UVD ENC run queue.\n"); - return r; + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + ring = &adev->uvd.inst[j].ring_enc[0]; + rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL]; + r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc, + rq, NULL); + if (r) { + DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j); + return r; + } } r = amdgpu_uvd_resume(adev); if (r) return r; - if (!amdgpu_sriov_vf(adev)) { - ring = &adev->uvd.inst->ring; - sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); - if (r) - return r; - } - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.inst->ring_enc[i]; - sprintf(ring->name, "uvd_enc%d", i); - if (amdgpu_sriov_vf(adev)) { - ring->use_doorbell = true; - - /* currently only use the first enconding ring for - * sriov, so set unused location for other unused rings. - */ - if (i == 0) - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; - else - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; + for (j = 0; j < adev->uvd.num_uvd_inst; j++) { + if (!amdgpu_sriov_vf(adev)) { + ring = &adev->uvd.inst[j].ring; + sprintf(ring->name, "uvd<%d>", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); + if (r) + return r; + } + + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + ring = &adev->uvd.inst[j].ring_enc[i]; + sprintf(ring->name, "uvd_enc%d<%d>", i, j); + if (amdgpu_sriov_vf(adev)) { + ring->use_doorbell = true; + + /* currently only use the first enconding ring for + * sriov, so set unused location for other unused rings. + */ + if (i == 0) + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + else + ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; + } + r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); + if (r) + return r; } - r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); - if (r) - return r; } r = amdgpu_virt_alloc_mm_table(adev); @@ -464,7 +471,7 @@ static int uvd_v7_0_sw_init(void *handle) static int uvd_v7_0_sw_fini(void *handle) { - int i, r; + int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_virt_free_mm_table(adev); @@ -473,11 +480,12 @@ static int uvd_v7_0_sw_fini(void *handle) if (r) return r; - drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc); - - for (i = 0; i < adev->uvd.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]); + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc); + for (i = 0; i < adev->uvd.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]); + } return amdgpu_uvd_sw_fini(adev); } @@ -491,9 +499,9 @@ static int uvd_v7_0_sw_fini(void *handle) static int uvd_v7_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.inst->ring; + struct amdgpu_ring *ring; uint32_t tmp; - int i, r; + int i, j, r; if (amdgpu_sriov_vf(adev)) r = uvd_v7_0_sriov_start(adev); @@ -502,57 +510,60 @@ static int uvd_v7_0_hw_init(void *handle) if (r) goto done; - if (!amdgpu_sriov_vf(adev)) { - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { + ring = &adev->uvd.inst[j].ring; + + if (!amdgpu_sriov_vf(adev)) { + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + + r = amdgpu_ring_alloc(ring, 10); + if (r) { + DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r); + goto done; + } + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + tmp = PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); + amdgpu_ring_write(ring, tmp); + amdgpu_ring_write(ring, 0xFFFFF); + + /* Clear timeout status bits */ + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_TIMEOUT_STATUS), 0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j, + mmUVD_SEMA_CNTL), 0)); + amdgpu_ring_write(ring, 3); + + amdgpu_ring_commit(ring); } - r = amdgpu_ring_alloc(ring, 10); - if (r) { - DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); - goto done; + for (i = 0; i < adev->uvd.num_enc_rings; ++i) { + ring = &adev->uvd.inst[j].ring_enc[i]; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } } - - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); - - tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0); - amdgpu_ring_write(ring, tmp); - amdgpu_ring_write(ring, 0xFFFFF); - - /* Clear timeout status bits */ - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_TIMEOUT_STATUS), 0)); - amdgpu_ring_write(ring, 0x8); - - amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, - mmUVD_SEMA_CNTL), 0)); - amdgpu_ring_write(ring, 3); - - amdgpu_ring_commit(ring); } - - for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - ring = &adev->uvd.inst->ring_enc[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; - } - } - done: if (!r) DRM_INFO("UVD and UVD ENC initialized successfully.\n"); @@ -570,7 +581,7 @@ done: static int uvd_v7_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->uvd.inst->ring; + int i; if (!amdgpu_sriov_vf(adev)) uvd_v7_0_stop(adev); @@ -579,7 +590,8 @@ static int uvd_v7_0_hw_fini(void *handle) DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); } - ring->ready = false; + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) + adev->uvd.inst[i].ring.ready = false; return 0; } @@ -619,48 +631,51 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) { uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev); uint32_t offset; + int i; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - offset = 0; - } else { - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.inst->gpu_addr)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.inst->gpu_addr)); - offset = size; - } + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + offset = 0; + } else { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->uvd.inst[i].gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->uvd.inst[i].gpu_addr)); + offset = size; + } - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.inst->gpu_addr + offset)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.inst->gpu_addr + offset)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); - - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->uvd.inst->gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, - AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); - - WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); + + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, + AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); + + WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); + } } static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, @@ -670,6 +685,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, uint64_t addr = table->gpu_addr; struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr; uint32_t size; + int i; size = header->header_size + header->vce_table_size + header->uvd_table_size; @@ -689,11 +705,12 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, /* 4, set resp to zero */ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); - WDOORBELL32(adev->uvd.inst->ring_enc[0].doorbell_index, 0); - adev->wb.wb[adev->uvd.inst->ring_enc[0].wptr_offs] = 0; - adev->uvd.inst->ring_enc[0].wptr = 0; - adev->uvd.inst->ring_enc[0].wptr_old = 0; - + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { + WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0); + adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0; + adev->uvd.inst[i].ring_enc[0].wptr = 0; + adev->uvd.inst[i].ring_enc[0].wptr_old = 0; + } /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); @@ -726,6 +743,7 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) struct mmsch_v1_0_cmd_end end = { {0} }; uint32_t *init_table = adev->virt.mm_table.cpu_addr; struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table; + uint8_t i = 0; direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE; direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; @@ -743,120 +761,121 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) init_table += header->uvd_table_offset; - ring = &adev->uvd.inst->ring; - ring->wptr = 0; - size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4); - - MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), - 0xFFFFFFFF, 0x00000004); - /* mc resume*/ - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - offset = 0; - } else { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->uvd.inst->gpu_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->uvd.inst->gpu_addr)); - offset = size; + for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { |