From 4a6eccbcb9ea88cf0408115e9d130ce7062ee6fd Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Wed, 10 Apr 2019 12:16:34 +0800 Subject: drm/i915/gvt: Change fb_info->size from pages to bytes fb_info->size is in pages, but some function need bytes when it is as a parameter. Such as: a. intel_gvt_ggtt_validate_range(), according to function definition b. vifio_device_gfx_plane_info->size, according to the comment of its definition So change fb_info->size into bytes. v2: Keep fb_info->size in real size instead of assinging casted page size(zhenyu) v3: obj->size should be page aligned and delete redundant check(zhenyu) Signed-off-by: Xiong Zhang Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 4e1e425189ba..c104f041d0f4 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -45,6 +45,7 @@ static int vgpu_gem_get_pages( int i, ret; gen8_pte_t __iomem *gtt_entries; struct intel_vgpu_fb_info *fb_info; + u32 page_num; fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info; if (WARN_ON(!fb_info)) @@ -54,14 +55,15 @@ static int vgpu_gem_get_pages( if (unlikely(!st)) return -ENOMEM; - ret = sg_alloc_table(st, fb_info->size, GFP_KERNEL); + page_num = obj->base.size >> PAGE_SHIFT; + ret = sg_alloc_table(st, page_num, GFP_KERNEL); if (ret) { kfree(st); return ret; } gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + (fb_info->start >> PAGE_SHIFT); - for_each_sg(st->sgl, sg, fb_info->size, i) { + for_each_sg(st->sgl, sg, page_num, i) { sg->offset = 0; sg->length = PAGE_SIZE; sg_dma_address(sg) = @@ -158,7 +160,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev, return NULL; drm_gem_private_object_init(dev, &obj->base, - info->size << PAGE_SHIFT); + roundup(info->size, PAGE_SIZE)); i915_gem_object_init(obj, &intel_vgpu_gem_ops); obj->read_domains = I915_GEM_DOMAIN_GTT; @@ -206,7 +208,6 @@ static int vgpu_get_plane_info(struct drm_device *dev, struct intel_vgpu_fb_info *info, int plane_id) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_vgpu_primary_plane_format p; struct intel_vgpu_cursor_plane_format c; int ret, tile_height = 1; @@ -267,8 +268,7 @@ static int vgpu_get_plane_info(struct drm_device *dev, return -EINVAL; } - info->size = (info->stride * roundup(info->height, tile_height) - + PAGE_SIZE - 1) >> PAGE_SHIFT; + info->size = info->stride * roundup(info->height, tile_height); if (info->size == 0) { gvt_vgpu_err("fb size is zero\n"); return -EINVAL; @@ -278,11 +278,6 @@ static int vgpu_get_plane_info(struct drm_device *dev, gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start); return -EFAULT; } - if (((info->start >> PAGE_SHIFT) + info->size) > - ggtt_total_entries(&dev_priv->ggtt)) { - gvt_vgpu_err("Invalid GTT offset or size\n"); - return -EFAULT; - } if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) { gvt_vgpu_err("invalid gma addr\n"); -- cgit v1.2.3 From 0cf8f58d0a340a2ac744f6e0e3402a89780ecf8b Mon Sep 17 00:00:00 2001 From: Aleksei Gimbitskii Date: Tue, 23 Apr 2019 15:04:08 +0300 Subject: drm/i915/gvt: Remove typedef and let the enumeration starts from zero Typedef is not recommended in the Linux kernel.The klocwork static code analyzer takes the enumeration as the full range of intel_gvt_gtt_type_t. But the intel_gvt_gtt_type_t will never be used in full range. For example, the GTT_TYPE_INVALID will never be used as an index of an array. Remove the typedef and let the enumeration starts from zero to pass klocwork analysis. This patch fixed the critial issues #483, #551, #665 reported by klockwork. v3: - Remove the typedef and let the enumeration starts from zero. Signed-off-by: Aleksei Gimbitskii Cc: Zhenyu Wang Cc: Zhi Wang CC: Colin Xu Reviewed-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 12 ++++++------ drivers/gpu/drm/i915/gvt/gtt.h | 16 ++++++++-------- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index c2f7d20f6346..7600416db908 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -811,7 +811,7 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt); /* Allocate shadow page table without guest page. */ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt( - struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type) + struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type) { struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev; struct intel_vgpu_ppgtt_spt *spt = NULL; @@ -861,7 +861,7 @@ err_free_spt: /* Allocate shadow page table associated with specific gfn. */ static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn( - struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type, + struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type, unsigned long gfn, bool guest_pde_ips) { struct intel_vgpu_ppgtt_spt *spt; @@ -936,7 +936,7 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu, { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; - intel_gvt_gtt_type_t cur_pt_type; + enum intel_gvt_gtt_type cur_pt_type; GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type))); @@ -1855,7 +1855,7 @@ static void vgpu_free_mm(struct intel_vgpu_mm *mm) * Zero on success, negative error code in pointer if failed. */ struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, - intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) + enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) { struct intel_gvt *gvt = vgpu->gvt; struct intel_vgpu_mm *mm; @@ -2309,7 +2309,7 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, } static int alloc_scratch_pages(struct intel_vgpu *vgpu, - intel_gvt_gtt_type_t type) + enum intel_gvt_gtt_type type) { struct intel_vgpu_gtt *gtt = &vgpu->gtt; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; @@ -2594,7 +2594,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, * Zero on success, negative error code if failed. */ struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, - intel_gvt_gtt_type_t root_entry_type, u64 pdps[]) + enum intel_gvt_gtt_type root_entry_type, u64 pdps[]) { struct intel_vgpu_mm *mm; diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 32c573aea494..42d0394f0de2 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -95,8 +95,8 @@ struct intel_gvt_gtt { unsigned long scratch_mfn; }; -typedef enum { - GTT_TYPE_INVALID = -1, +enum intel_gvt_gtt_type { + GTT_TYPE_INVALID = 0, GTT_TYPE_GGTT_PTE, @@ -124,7 +124,7 @@ typedef enum { GTT_TYPE_PPGTT_PML4_PT, GTT_TYPE_MAX, -} intel_gvt_gtt_type_t; +}; enum intel_gvt_mm_type { INTEL_GVT_MM_GGTT, @@ -148,7 +148,7 @@ struct intel_vgpu_mm { union { struct { - intel_gvt_gtt_type_t root_entry_type; + enum intel_gvt_gtt_type root_entry_type; /* * The 4 PDPs in ring context. For 48bit addressing, * only PDP0 is valid and point to PML4. For 32it @@ -169,7 +169,7 @@ struct intel_vgpu_mm { }; struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, - intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); + enum intel_gvt_gtt_type root_entry_type, u64 pdps[]); static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm) { @@ -233,7 +233,7 @@ struct intel_vgpu_ppgtt_spt { struct intel_vgpu *vgpu; struct { - intel_gvt_gtt_type_t type; + enum intel_gvt_gtt_type type; bool pde_ips; /* for 64KB PTEs */ void *vaddr; struct page *page; @@ -241,7 +241,7 @@ struct intel_vgpu_ppgtt_spt { } shadow_page; struct { - intel_gvt_gtt_type_t type; + enum intel_gvt_gtt_type type; bool pde_ips; /* for 64KB PTEs */ unsigned long gfn; unsigned long write_cnt; @@ -267,7 +267,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]); struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu, - intel_gvt_gtt_type_t root_entry_type, u64 pdps[]); + enum intel_gvt_gtt_type root_entry_type, u64 pdps[]); int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 18f01eeb2510..34129eacfd22 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1206,7 +1206,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) { - intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; + enum intel_gvt_gtt_type root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY; struct intel_vgpu_mm *mm; u64 *pdps; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 8998fa5ab198..7c99bbc3e2b8 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1343,7 +1343,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload) struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; struct intel_vgpu_mm *mm; struct intel_vgpu *vgpu = workload->vgpu; - intel_gvt_gtt_type_t root_entry_type; + enum intel_gvt_gtt_type root_entry_type; u64 pdps[GVT_RING_CTX_NR_PDPS]; switch (desc->addressing_mode) { -- cgit v1.2.3 From d9420241d09bac6ba930d95963bd237ec9629db6 Mon Sep 17 00:00:00 2001 From: Aleksei Gimbitskii Date: Tue, 23 Apr 2019 15:04:09 +0300 Subject: drm/i915/gvt: Do not copy the uninitialized pointer from fb_info In the code the memcpy() function copied uninitialized pointer in fb_info to dmabuf_obj->info. Later the pointer in dmabuf_obj->info will be initialized. To make the code aligned with requirements of the klocwork static code analyzer, the uninitialized pointer should be initialized before memcpy(). v2: - Initialize fb_info.obj in vgpu_get_plane_info(). (Colin Xu) This patch fixed the critical issue #632 reported by klockwork. Signed-off-by: Aleksei Gimbitskii Cc: Zhenyu Wang Cc: Zhi Wang Cc: Colin Xu Reviewed-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/dmabuf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index c104f041d0f4..41c8ebc60c63 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -212,6 +212,8 @@ static int vgpu_get_plane_info(struct drm_device *dev, struct intel_vgpu_cursor_plane_format c; int ret, tile_height = 1; + memset(info, 0, sizeof(*info)); + if (plane_id == DRM_PLANE_TYPE_PRIMARY) { ret = intel_vgpu_decode_primary_plane(vgpu, &p); if (ret) -- cgit v1.2.3 From 4feeea1d8d7713c5838d99c1fdfcc2e90c0f977d Mon Sep 17 00:00:00 2001 From: Aleksei Gimbitskii Date: Tue, 23 Apr 2019 15:04:10 +0300 Subject: drm/i915/gvt: Use snprintf() to prevent possible buffer overflow. For printing the intel_vgpu->id, a buffer with fixed length is allocated on the stack. But if vgpu->id is greater than 6 characters, the buffer overflow will happen. Even the string of the amount of max vgpu is less that the length buffer right now, it's better to replace sprintf() with snprintf(). v2: - Increase the size of the buffer. (Colin Xu) This patch fixed the critical issue #673 reported by klocwork. Signed-off-by: Aleksei Gimbitskii Cc: Zhenyu Wang Cc: Zhi Wang Cc: Colin Xu Reviewed-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 2ec89bcb59f1..8a9606f91e68 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -196,9 +196,9 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) { struct dentry *ent; - char name[10] = ""; + char name[16] = ""; - sprintf(name, "vgpu%d", vgpu->id); + snprintf(name, 16, "vgpu%d", vgpu->id); vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root); if (!vgpu->debugfs) return -ENOMEM; -- cgit v1.2.3 From 930c8dfea4b8f320bd7f6a3e5e721ac20e444e03 Mon Sep 17 00:00:00 2001 From: Aleksei Gimbitskii Date: Tue, 23 Apr 2019 15:04:13 +0300 Subject: drm/i915/gvt: Check if get_next_pt_type() always returns a valid value According to gtt_type_table[] function get_next_pt_type() may returns GTT_TYPE_INVALID in some cases. To prevent driver to try to create memory page with invalid data type, additional check is added. Signed-off-by: Aleksei Gimbitskii Cc: Zhenyu Wang Cc: Zhi Wang Cc: Colin Xu Reviewed-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 7600416db908..08c74e65836b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1076,6 +1076,9 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry( } else { int type = get_next_pt_type(we->type); + if (!gtt_type_is_pt(type)) + goto err; + spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips); if (IS_ERR(spt)) { ret = PTR_ERR(spt); -- cgit v1.2.3 From 8631fef7f2037281efca685c9e717eaed33ee37c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Thu, 25 Apr 2019 17:04:54 +0800 Subject: drm/i915/gvt: Revert "drm/i915/gvt: Refine the snapshort range of I915 MCHBAR to optimize gvt-g boot time" This reverts commit f74a6d9a2c427b6656bc93eacfa6d329ba54d611. BXT needs to access 0x141000-0x1417ff register to obtain the dram info. But after the snapshot range of I915_MCHBAR is refined in f74a6d9a2c, it only initializes the range of 0x144000-0x147fff for VGPU and then causes that the guest GPU can't get the initialized value for dram detection on BXT. Signed-off-by: Zhao Yakui Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/reg.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 34129eacfd22..90673fca792f 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3303,7 +3303,7 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt) /* Special MMIO blocks. */ static struct gvt_mmio_block mmio_blocks[] = { {D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL}, - {D_ALL, MCHBAR_MIRROR_REG_BASE, 0x4000, NULL, NULL}, + {D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL}, {D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE, pvinfo_mmio_read, pvinfo_mmio_write}, {D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL}, diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h index 3de5b643b266..33aaa14bfdde 100644 --- a/drivers/gpu/drm/i915/gvt/reg.h +++ b/drivers/gpu/drm/i915/gvt/reg.h @@ -126,7 +126,4 @@ #define RING_GFX_MODE(base) _MMIO((base) + 0x29c) #define VF_GUARDBAND _MMIO(0x83a4) -/* define the effective range of MCHBAR register on Sandybridge+ */ -#define MCHBAR_MIRROR_REG_BASE _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000) - #endif -- cgit v1.2.3 From 75fdb811d93c8aa4a9f73b63db032b1e6a8668ef Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Fri, 22 Feb 2019 14:13:42 +0800 Subject: drm/i915/gvt: Add in context mmio 0x20D8 to gen9 mmio list Depends on GEN family and I915_PARAM_HAS_CONTEXT_ISOLATION, Mesa driver will decide whether constant buffer 0 address is relative or absolute, and load GPU initial state by lri to context mmio INSTPM (GEN8) or 0x20D8 (>=GEN9). Mesa Commit fa8a764b62 ("i965: Use absolute addressing for constant buffer 0 on Kernel 4.16+.") INSTPM is already added to gen8_engine_mmio_list, but 0x20D8 is missed in gen9_engine_mmio_list. From GVT point of view, different guest could have different context so should switch those mmio accordingly. v2: Update fixes commit ID. Fixes: 178657139307 ("drm/i915/gvt: vGPU context switch") Reviewed-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang (cherry picked from commit 1e8b15a1988ed3c7429402017d589422628cdf47) --- drivers/gpu/drm/i915/gvt/mmio_context.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index e7e14c842be4..edf6d646eb25 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -132,6 +132,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { {RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ {RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */ + {RCS0, _MMIO(0x20D8), 0xffff, true}, /* 0x20d8 */ {RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ {RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ -- cgit v1.2.3 From bdb1ccb080dafc1b4224873a5b759ff85a7d1c10 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 30 Apr 2019 09:47:25 +0800 Subject: drm/amdgpu: remove ATPX_DGPU_REQ_POWER_FOR_DISPLAYS check when hotplug-in In amdgpu_atif_handler, when hotplug event received, remove ATPX_DGPU_REQ_POWER_FOR_DISPLAYS check. This bit's check will cause missing system resume. Signed-off-by: Aaron Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 4376b17ca594..56f8ca2a3bb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -464,8 +464,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, } } if (req.pending & ATIF_DGPU_DISPLAY_EVENT) { - if ((adev->flags & AMD_IS_PX) && - amdgpu_atpx_dgpu_req_power_for_displays()) { + if (adev->flags & AMD_IS_PX) { pm_runtime_get_sync(adev->ddev->dev); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(adev->ddev); -- cgit v1.2.3 From 74dcfe74b4ef6cd8f219c42c4ea4df0310c78b33 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Tue, 30 Apr 2019 22:38:51 +0800 Subject: drm/amdgpu: Rearm IRQ in Vega10 SR-IOV if IRQ lost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Multi-VFs stress test, sometimes we see IRQ lost when running benchmark, just rearm it. Signed-off-by: Trigger Huang Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 37 +++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 1b2f69a9a24e..8d89ab7f0ae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -31,7 +31,7 @@ #include "soc15_common.h" #include "vega10_ih.h" - +#define MAX_REARM_RETRY 10 static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev); @@ -381,6 +381,38 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, ih->rptr += 32; } +/** + * vega10_ih_irq_rearm - rearm IRQ if lost + * + * @adev: amdgpu_device pointer + * + */ +static void vega10_ih_irq_rearm(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + uint32_t reg_rptr = 0; + uint32_t v = 0; + uint32_t i = 0; + + if (ih == &adev->irq.ih) + reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR); + else if (ih == &adev->irq.ih1) + reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1); + else if (ih == &adev->irq.ih2) + reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2); + else + return; + + /* Rearm IRQ / re-wwrite doorbell if doorbell write is lost */ + for (i = 0; i < MAX_REARM_RETRY; i++) { + v = RREG32_NO_KIQ(reg_rptr); + if ((v < ih->ring_size) && (v != ih->rptr)) + WDOORBELL32(ih->doorbell_index, ih->rptr); + else + break; + } +} + /** * vega10_ih_set_rptr - set the IH ring buffer rptr * @@ -395,6 +427,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev, /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; WDOORBELL32(ih->doorbell_index, ih->rptr); + + if (amdgpu_sriov_vf(adev)) + vega10_ih_irq_rearm(adev, ih); } else if (ih == &adev->irq.ih) { WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); } else if (ih == &adev->irq.ih1) { -- cgit v1.2.3 From 3680624e32412ced54f5659431c35b26e3e83923 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Tue, 30 Apr 2019 22:00:31 +0800 Subject: drm/amdgpu: Fix VM clean check method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit amdgpu_vm_make_compute is used to turn a GFX VM into a compute VM, the prerequisite is this VM is clean. Let's check if some page tables are already filled , while not check if some mapping is already made. Signed-off-by: Trigger Huang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 36 +++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a07c85815b7a..4f10f5aba00b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2756,6 +2756,37 @@ error_free_sched_entity: return r; } +/** + * amdgpu_vm_check_clean_reserved - check if a VM is clean + * + * @adev: amdgpu_device pointer + * @vm: the VM to check + * + * check all entries of the root PD, if any subsequent PDs are allocated, + * it means there are page table creating and filling, and is no a clean + * VM + * + * Returns: + * 0 if this VM is clean + */ +static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + enum amdgpu_vm_level root = adev->vm_manager.root_level; + unsigned int entries = amdgpu_vm_num_entries(adev, root); + unsigned int i = 0; + + if (!(vm->root.entries)) + return 0; + + for (i = 0; i < entries; i++) { + if (vm->root.entries[i].base.bo) + return -EINVAL; + } + + return 0; +} + /** * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM * @@ -2786,10 +2817,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns return r; /* Sanity checks */ - if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { - r = -EINVAL; + r = amdgpu_vm_check_clean_reserved(adev, vm); + if (r) goto unreserve_bo; - } if (pasid) { unsigned long flags; -- cgit v1.2.3 From b6818520edb0dc83d8de807cb40dff44995eab86 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Tue, 30 Apr 2019 16:23:29 +0800 Subject: drm/amdgpu: Add IDH_QUERY_ALIVE event for SR-IOV SR-IOV host side will send IDH_QUERY_ALIVE to guest VM to check if this guest VM is still alive (not destroyed). The only thing guest KMD need to do is to send ACK back to host. Signed-off-by: Trigger Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 3 +++ drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 1 + 2 files changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 8dbad496b29f..2471e7cf75ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -372,6 +372,9 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, if (amdgpu_sriov_runtime(adev)) schedule_work(&adev->virt.flr_work); break; + case IDH_QUERY_ALIVE: + xgpu_ai_mailbox_send_ack(adev); + break; /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore * it byfar since that polling thread will handle it, * other msg like flr complete is not handled here. diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 39d151b79153..077e91a33d62 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -49,6 +49,7 @@ enum idh_event { IDH_FLR_NOTIFICATION_CMPL, IDH_SUCCESS, IDH_FAIL, + IDH_QUERY_ALIVE, IDH_EVENT_MAX }; -- cgit v1.2.3 From 570c91d51b337053a90ac91710b5fa5d2aacd311 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 2 May 2019 09:14:27 -0400 Subject: drm/amd/display: Use long for signed error code checks in commit planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] The type of 'r' is uint32_t and the return codes for both: - reservation_object_wait_timeout_rcu - amdgpu_bo_reserve ...are signed. While it works for the latter since the check is done on != 0 it doesn't work for the former since we check <= 0. [How] Make 'r' a long in commit planes so we're not doing any unsigned/signed conversion here in the first place. v2: use long instead of int (Christian) Reported-by: Dan Carpenter Signed-off-by: Nicholas Kazlauskas Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1854506e3e8f..995f9df66142 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5242,7 +5242,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_crtc *pcrtc, bool wait_for_vblank) { - uint32_t i, r; + uint32_t i; uint64_t timestamp_ns; struct drm_plane *plane; struct drm_plane_state *old_plane_state, *new_plane_state; @@ -5253,6 +5253,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc)); int planes_count = 0, vpos, hpos; + long r; unsigned long flags; struct amdgpu_bo *abo; uint64_t tiling_flags; -- cgit v1.2.3 From e766fde6511e2be83acbca1d603035e08de23f3b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 May 2019 12:45:36 +0100 Subject: drm/i915: Delay semaphore submission until the start of the signaler Currently we submit the semaphore busywait as soon as the signaler is submitted to HW. However, we may submit the signaler as the tail of a batch of requests, and even not as the first context in the HW list, i.e. the busywait may start spinning far in advance of the signaler even starting. If we wait until the request before the signaler is completed before submitting the busywait, we prevent the busywait from starting too early, if the signaler is not first in submission port. To handle the case where the signaler is at the start of the second (or later) submission port, we will need to delay the execution callback until we know the context is promoted to port0. A challenge for later. Fixes: e88619646971 ("drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190501114541.10077-9-chris@chris-wilson.co.uk (cherry picked from commit 0d90ccb70211cbf55140e91bd39db684aa4c16e9) [Joonas: edited Fixes: tag into single line.] Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b836721d3b13..20c7c77b3768 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -798,6 +798,21 @@ err_unreserve: return ERR_PTR(ret); } +static int +i915_request_await_start(struct i915_request *rq, struct i915_request *signal) +{ + if (list_is_first(&signal->ring_link, &signal->ring->request_list)) + return 0; + + signal = list_prev_entry(signal, ring_link); + if (i915_timeline_sync_is_later(rq->timeline, &signal->fence)) + return 0; + + return i915_sw_fence_await_dma_fence(&rq->submit, + &signal->fence, 0, + I915_FENCE_GFP); +} + static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, @@ -816,6 +831,10 @@ emit_semaphore_wait(struct i915_request *to, &from->fence, 0, I915_FENCE_GFP); + err = i915_request_await_start(to, from); + if (err < 0) + return err; + err = i915_sw_fence_await_dma_fence(&to->semaphore, &from->fence, 0, I915_FENCE_GFP); -- cgit v1.2.3 From 2564fe708b580c1ef12b2b527ab6e8afe11ad444 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 4 May 2019 08:07:07 +0100 Subject: drm/i915: Disable semaphore busywaits on saturated systems Asking the GPU to busywait on a memory address, perhaps not unexpectedly in hindsight for a shared system, leads to bus contention that affects CPU programs trying to concurrently access memory. This can manifest as a drop in transcode throughput on highly over-saturated workloads. The only clue offered by perf, is that the bus-cycles (perf stat -e bus-cycles) jumped by 50% when enabling semaphores. This corresponds with extra CPU active cycles being attributed to intel_idle's mwait. This patch introduces a heuristic to try and detect when more than one client is submitting to the GPU pushing it into an oversaturated state. As we already keep track of when the semaphores are signaled, we can inspect their state on submitting the busywait batch and if we planned to use a semaphore but were too late, conclude that the GPU is overloaded and not try to use semaphores in future requests. In practice, this means we optimistically try to use semaphores for the first frame of a transcode job split over multiple engines, and fail if there are multiple clients active and continue not to use semaphores for the subsequent frames in the sequence. Periodically, we try to optimistically switch semaphores back on whenever the client waits to catch up with the transcode results. With 1 client, on Broxton J3455, with the relative fps normalized by %cpu: x no semaphores + drm-tip * patched +------------------------------------------------------------------------+ | * | | *+ | | **+ | | **+ x | | x * +**+ x | | x x * * +***x xx | | x x * * *+***x *x | | x x* + * * *****x *x x | | + x xx+x* + *** * ********* x * | | + x xx+x* * *** +** ********* xx * | | * + ++++* + x*x****+*+* ***+*************+x* * | |*+ +** *+ + +* + *++****** *xxx**********x***+*****************+*++ *| | |__________A_____M_____| | | |_______________A____M_________| | | |____________A___M________| | +------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 120 2.60475 3.50941 3.31123 3.2143953 0.21117399 + 120 2.3826 3.57077 3.25101 3.1414161 0.28146407 Difference at 95.0% confidence -0.0729792 +/- 0.0629585 -2.27039% +/- 1.95864% (Student's t, pooled s = 0.248814) * 120 2.35536 3.66713 3.2849 3.2059917 0.24618565 No difference proven at 95.0% confidence With 10 clients over-saturating the pipeline: x no semaphores + drm-tip * patched +------------------------------------------------------------------------+ | ++ ** | | ++ ** | | ++ ** | | ++ ** | | ++ xx *** | | ++ xx *** | | ++ xxx*** | | ++ xxx*** | | +++ xxx*** | | +++ xx**** | | +++ xx**** | | +++ xx**** | | +++ xx**** | | ++++ xx**** | | +++++ xx**** | | +++++ x x****** | | ++++++ xxx******* | | ++++++ xxx******* | | ++++++ xxx******* | | ++++++ xx******** | | ++++++ xxxx******** | | ++++++ xxxx******** | | ++++++++ xxxxx********* | |+ + + + ++++++++ xxx*xx**********x* *| | |__A__| | | |__AM__| | | |__A_| | +------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 120 2.47855 2.8972 2.72376 2.7193402 0.074604933 + 120 1.17367 1.77459 1.71977 1.6966782 0.085850697 Difference at 95.0% confidence -1.02266 +/- 0.0203502 -37.607% +/- 0.748352% (Student's t, pooled s = 0.0804246) * 120 2.57868 3.00821 2.80142 2.7923878 0.058646477 Difference at 95.0% confidence 0.0730476 +/- 0.0169791 2.68622% +/- 0.624383% (Student's t, pooled s = 0.0671018) Indicating that we've recovered the regression from enabling semaphores on this saturated setup, with a hint towards an overall improvement. Very similar, but of smaller magnitude, results are observed on both Skylake(gt2) and Kabylake(gt4). This may be due to the reduced impact of bus-cycles, where we see a 50% hit on Broxton, it is only 10% on the big core, in this particular test. One observation to make here is that for a greedy client trying to maximise its own throughput, using semaphores is the right choice. It is only the holistic system-wide view that semaphores of one client impacts another and reduces the overall throughput where we would choose to disable semaphores. The most noticeable negactive impact this has is on the no-op microbenchmarks, which are also very notable for having no cpu bus load. In particular, this increases the runtime and energy consumption of gem_exec_whisper. Fixes: e88619646971 ("drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Dmitry Rogozhkin Cc: Dmitry Ermilov Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190504070707.30902-1-chris@chris-wilson.co.uk (cherry picked from commit ca6e56f654e7b241256ffba78cd2abb22aa3bc97) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 40 +++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_context.c | 1 + drivers/gpu/drm/i915/intel_context_types.h | 3 +++ 3 files changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 20c7c77b3768..ce342f7f7ddb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request) if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); + /* + * Are we using semaphores when the gpu is already saturated? + * + * Using semaphores incurs a cost in having the GPU poll a + * memory location, busywaiting for it to change. The continual + * memory reads can have a noticeable impact on the rest of the + * system with the extra bus traffic, stalling the cpu as it too + * tries to access memory across the bus (perf stat -e bus-cycles). + * + * If we installed a semaphore on this request and we only submit + * the request after the signaler completed, that indicates the + * system is overloaded and using semaphores at this time only + * increases the amount of work we are doing. If so, we disable + * further use of semaphores until we are idle again, whence we + * optimistically try again. + */ + if (request->sched.semaphores && + i915_sw_fence_signaled(&request->semaphore)) + request->hw_context->saturated |= request->sched.semaphores; + /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -813,6 +833,24 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) I915_FENCE_GFP); } +static intel_engine_mask_t +already_busywaiting(struct i915_request *rq) +{ + /* + * Polling a semaphore causes bus traffic, delaying other users of + * both the GPU and CPU. We want to limit the impact on others, + * while taking advantage of early submission to reduce GPU + * latency. Therefore we restrict ourselves to not using more + * than one semaphore from each source, and not using a semaphore + * if we have detected the engine is saturated (i.e. would not be + * submitted early and cause bus traffic reading an already passed + * semaphore). + * + * See the are-we-too-late? check in __i915_request_submit(). + */ + return rq->sched.semaphores | rq->hw_context->saturated; +} + static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, @@ -826,7 +864,7 @@ emit_semaphore_wait(struct i915_request *to, GEM_BUG_ON(INTEL_GEN(to->i915) < 8); /* Just emit the first semaphore we see as request space is limited. */ - if (to->sched.semaphores & from->engine->mask) + if (already_busywaiting(to) & from->engine->mask) return i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, I915_FENCE_GFP); diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c index 8931e0fee873..924cc556223a 100644 --- a/drivers/gpu/drm/i915/intel_context.c +++ b/drivers/gpu/drm/i915/intel_context.c @@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce, ce->gem_context = ctx; ce->engine = engine; ce->ops = engine->cops; + ce->saturated = 0; INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signals); diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h index 68b4ca1611e0..339c7437fe82 100644 --- a/drivers/gpu/drm/i915/intel_context_types.h +++ b/drivers/gpu/drm/i915/intel_context_types.h @@ -14,6 +14,7 @@ #include #include "i915_active_types.h" +#include "intel_engine_types.h" struct i915_gem_context; struct i915_vma; @@ -58,6 +59,8 @@ struct intel_context { atomic_t pin_count; struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ + intel_engine_mask_t saturated; /* submitting semaphores too late? */ + /** * active_tracker: Active tracker for the external rq activity * on this intel_context object. -- cgit v1.2.3 From 992fbe8ce035d8c3fb2615ac6e8faeaa7c2fa2c3 Mon Sep 17 00:00:00 2001 From: Trigger Huang Date: Mon, 25 Feb 2019 19:19:56 +0800 Subject: drm/amdgpu: Use FW addr returned by PSP for VF MM One Vega10 SR-IOV VF, the FW address returned by PSP should be set into the init table, while not the original BO mc address. otherwise, UVD and VCE IB test will fail under Vega10 SR-IOV reference: commit bfcea5204287 ("drm/amdgpu:change VEGA booting with firmware loaded by PSP") commit aa5873dca463 ("drm/amdgpu: Change VCE booting with firmware loaded by PSP") Signed-off-by: Trigger Huang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 16 ++++++++++------ drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 17 +++++++++++------ 2 files changed, 21 insertions(+), 12 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index dc461df48da0..2191d3d0a219 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -787,10 +787,13 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) 0xFFFFFFFF, 0x00000004); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, + mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0); offset = 0; } else { MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), @@ -798,10 +801,11 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), upper_32_bits(adev->uvd.inst[i].gpu_addr)); offset = size; + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index f3f5938430d4..c0ec27991c22 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -244,13 +244,18 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); + offset = AMDGPU_VCE_FIRMWARE_OFFSET; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; + uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi; + uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low; + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, - mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), - adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); + mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); + (tmr_mc_addr >> 40) & 0xff); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); } else { MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), @@ -258,6 +263,9 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), (adev->vce.gpu_addr >> 40) & 0xff); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), + offset & ~0x0f000000); + } MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), @@ -272,10 +280,7 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) mmVCE_LMI_VCPU_CACHE_64BIT_BAR2), (adev->vce.gpu_addr >> 40) & 0xff); - offset = AMDGPU_VCE_FIRMWARE_OFFSET; size = VCE_V4_0_FW_SIZE; - MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), - offset & ~0x0f000000); MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0; -- cgit v1.2.3 From db8a974f7e6966d73b4e6afa673c5b8bc31a111e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Sun, 5 May 2019 11:00:50 +0800 Subject: drm/amd/powerplay: check for invalid profile_exit setting profile_exit performance level setting is valid only when current mode is in profile mode. Signed-off-by: Evan Quan Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 95144e49c7f9..34471dbaa872 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -342,6 +342,16 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, if (current_level == level) return count; + /* profile_exit setting is valid only when current mode is in profile mode */ + if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) && + (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) { + pr_err("Currently not in any profile mode!\n"); + return -EINVAL; + } + if (is_support_sw_smu(adev)) { mutex_lock(&adev->pm.mutex); if (adev->pm.dpm.thermal_active) { -- cgit v1.2.3 From 2e26ccb119bde03584be53406bbd22e711b0d6e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 6 May 2019 19:57:52 +0200 Subject: drm/radeon: prefer lower reference dividers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of the closest reference divider prefer the lowest, this fixes flickering issues on HP Compaq nx9420. Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=108514 Suggested-by: Paul Dufresne Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index aa898c699101..433df7036f96 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -922,12 +922,12 @@ static void avivo_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div, ref_div_max = max(min(100 / post_div, ref_div_max), 1u); /* get matching reference and feedback divider */ - *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max); + *ref_div = min(max(den/post_div, 1u), ref_div_max); *fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den); /* limit fb divider to its maximum */ if (*fb_div > fb_div_max) { - *ref_div = DIV_ROUND_CLOSEST(*ref_div * fb_div_max, *fb_div); + *ref_div = (*ref_div * fb_div_max)/(*fb_div); *fb_div = fb_div_max; } } -- cgit v1.2.3 From 9d6fea5744d6798353f37ac42a8a653a2607ca69 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 May 2019 21:45:06 -0500 Subject: drm/amdgpu/psp: move psp version specific function pointers to early_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case we need to use them for GPU reset prior initializing the asic. Fixes a crash if the driver attempts to reset the GPU at driver load time. Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 905cce1814f3..05897b05766b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -38,18 +38,10 @@ static void psp_set_funcs(struct amdgpu_device *adev); static int psp_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; psp_set_funcs(adev); - return 0; -} - -static int psp_sw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct psp_context *psp = &adev->psp; - int ret; - switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: @@ -67,6 +59,15 @@ static int psp_sw_init(void *handle) psp->adev = adev; + return 0; +} + +static int psp_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct psp_context *psp = &adev->psp; + int ret; + ret = psp_init_microcode(psp); if (ret) { DRM_ERROR("Failed to load psp firmware!\n"); -- cgit v1.2.3 From 396dd8143bdd94bd1c358a228a631c8c895a1126 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 23 Apr 2019 17:28:10 +0800 Subject: drm/i915/fbc: disable framebuffer compression on GeminiLake On many (all?) the Gemini Lake systems we work with, there is frequent momentary graphical corruption at the top of the screen, and it seems that disabling framebuffer compression can avoid this. The ticket was reported 6 months ago and has already affected a multitude of users, without any real progress being made. So, lets disable framebuffer compression on GeminiLake until a solution is found. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108085 Fixes: fd7d6c5c8f3e ("drm/i915: enable FBC on gen9+ too") Cc: Paulo Zanoni Cc: Daniel Vetter Cc: Jani Nikula Cc: # v4.11+ Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Drake Signed-off-by: Jian-Hong Pan Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190423092810.28359-1-jian-hong@endlessm.com (cherry picked from commit 1d25724b41fad7eeb2c3058a5c8190d6ece73e08) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_fbc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index c805a0966395..5679f2fffb7c 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1280,6 +1280,10 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (!HAS_FBC(dev_priv)) return 0; + /* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */ + if (IS_GEMINILAKE(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) return 1; -- cgit v1.2.3 From da471250706e2f103a1627d1d279c9de44325993 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 25 Apr 2019 19:29:05 +0300 Subject: drm/i915: Fix fastset vs. pfit on/off on HSW EDP transcoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On HSW the pipe A panel fitter lives inside the display power well, and the input MUX for the EDP transcoder needs to be configured appropriately to route the data through the power well as needed. Changing the MUX setting is not allowed while the pipe is active, so we need to force a full modeset whenever we need to change it. Currently we may end up doing a fastset which won't change the MUX settings, but it will drop the power well reference, and that kills the pipe. Cc: stable@vger.kernel.org Cc: Hans de Goede Cc: Maarten Lankhorst Fixes: d19f958db23c ("drm/i915: Enable fastset for non-boot modesets.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190425162906.5242-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst (cherry picked from commit 13b7648b7eab7e8259a2fb267b498bd9eba81ca0) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_display.c | 9 +++++++++ drivers/gpu/drm/i915/intel_pipe_crc.c | 13 ++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3bd40a4a6739..5098228f1302 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12082,6 +12082,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, struct intel_crtc_state *pipe_config, bool adjust) { + struct intel_crtc *crtc = to_intel_crtc(current_config->base.crtc); bool ret = true; bool fixup_inherited = adjust && (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && @@ -12303,6 +12304,14 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_X(gmch_pfit.pgm_ratios); PIPE_CONF_CHECK_X(gmch_pfit.lvds_border_bits); + /* + * Changing the EDP transcoder input mux + * (A_ONOFF vs. A_ON) requires a full modeset. + */ + if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A && + current_config->cpu_transcoder == TRANSCODER_EDP) + PIPE_CONF_CHECK_BOOL(pch_pfit.enabled); + if (!adjust) { PIPE_CONF_CHECK_I(pipe_src_w); PIPE_CONF_CHECK_I(pipe_src_h); diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index e94b5b1bc1b7..e7c7be4911c1 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -311,10 +311,17 @@ retry: pipe_config->base.mode_changed = pipe_config->has_psr; pipe_config->crc_enabled = enable; - if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A) { + if (IS_HASWELL(dev_priv) && + pipe_config->base.active && crtc->pipe == PIPE_A && + pipe_config->cpu_transcoder == TRANSCODER_EDP) { + bool old_need_power_well = pipe_config->pch_pfit.enabled || + pipe_config->pch_pfit.force_thru; + bool new_need_power_well = pipe_config->pch_pfit.enabled || + enable; + pipe_config->pch_pfit.force_thru = enable; - if (pipe_config->cpu_transcoder == TRANSCODER_EDP && - pipe_config->pch_pfit.enabled != enable) + + if (old_need_power_well != new_need_power_well) pipe_config->base.connectors_changed = true; } -- cgit v1.2.3 From c36beba6b296b3c05a0f29753b04775e5ae23886 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 May 2019 12:24:52 +0100 Subject: drm/i915: Seal races between async GPU cancellation, retirement and signaling Currently there is an underlying assumption that i915_request_unsubmit() is synchronous wrt the GPU -- that is the request is no longer in flight as we remove it. In the near future that may change, and this may upset our signaling as we can process an interrupt for that request while it is no longer in flight. CPU0 CPU1 intel_engine_breadcrumbs_irq (queue request completion) i915_request_cancel_signaling ... ... i915_request_enable_signaling dma_fence_signal Hence in the time it took us to drop the lock to signal the request, a preemption event may have occurred and re-queued the request. In the process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and so reused the rq->signal_link that was in use on CPU0, leading to bad pointer chasing in intel_engine_breadcrumbs_irq. A related issue was that if someone started listening for a signal on a completed but no longer in-flight request, we missed the opportunity to immediately signal that request. Furthermore, as intel_contexts may be immediately released during request retirement, in order to be entirely sure that intel_engine_breadcrumbs_irq may no longer dereference the intel_context (ce->signals and ce->signal_link), we must wait for irq spinlock. In order to prevent the race, we use a bit in the fence.flags to signal the transfer onto the signal list inside intel_engine_breadcrumbs_irq. For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then quickly signals to any outside observer that the fence is indeed signaled. v2: Sketch out potential dma-fence API for manual signaling v3: And the test_and_set_bit() Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190508112452.18942-1-chris@chris-wilson.co.uk (cherry picked from commit 0152b3b3f49b36b0f1a1bf9f0353dc636f41d8f0) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 1 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 78 +++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_guc_submission.c | 1 - 3 files changed, 58 insertions(+), 22 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ce342f7f7ddb..f6c78c0fa74b 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -452,6 +452,7 @@ void __i915_request_submit(struct i915_request *request) set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && + !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && !i915_request_enable_breadcrumb(request)) intel_engine_queue_breadcrumbs(engine); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 3cbffd400b1b..832cb6b1e9bd 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -23,6 +23,7 @@ */ #include +#include #include #include "i915_drv.h" @@ -80,9 +81,39 @@ static inline bool __request_completed(const struct i915_request *rq) return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); } +static bool +__dma_fence_signal(struct dma_fence *fence) +{ + return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags); +} + +static void +__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) +{ + fence->timestamp = timestamp; + set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); + trace_dma_fence_signaled(fence); +} + +static void +__dma_fence_signal__notify(struct dma_fence *fence) +{ + struct dma_fence_cb *cur, *tmp; + + lockdep_assert_held(fence->lock); + lockdep_assert_irqs_disabled(); + + list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) { + INIT_LIST_HEAD(&cur->node); + cur->func(fence, cur); + } + INIT_LIST_HEAD(&fence->cb_list); +} + void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + const ktime_t timestamp = ktime_get(); struct intel_context *ce, *cn; struct list_head *pos, *next; LIST_HEAD(signal); @@ -104,6 +135,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + if (!__dma_fence_signal(&rq->fence)) + continue; /* * Queue for execution after dropping the signaling @@ -111,14 +146,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) * more signalers to the same context or engine. */ i915_request_get(rq); - - /* - * We may race with direct invocation of - * dma_fence_signal(), e.g. i915_request_retire(), - * so we need to acquire our reference to the request - * before we cancel the breadcrumb. - */ - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); list_add_tail(&rq->signal_link, &signal); } @@ -141,7 +168,12 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine) struct i915_request *rq = list_entry(pos, typeof(*rq), signal_link); - dma_fence_signal(&rq->fence); + __dma_fence_signal__timestamp(&rq->fence, timestamp); + + spin_lock(&rq->lock); + __dma_fence_signal__notify(&rq->fence); + spin_unlock(&rq->lock); + i915_request_put(rq); } } @@ -243,19 +275,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine