From 4a6eccbcb9ea88cf0408115e9d130ce7062ee6fd Mon Sep 17 00:00:00 2001
From: Xiong Zhang <xiong.y.zhang@intel.com>
Date: Wed, 10 Apr 2019 12:16:34 +0800
Subject: drm/i915/gvt: Change fb_info->size from pages to bytes

fb_info->size is in pages, but some function need bytes when it
is as a parameter. Such as:
a. intel_gvt_ggtt_validate_range(), according to function definition
b. vifio_device_gfx_plane_info->size, according to the comment of
   its definition

So change fb_info->size into bytes.

v2: Keep fb_info->size in real size instead of assinging casted page
    size(zhenyu)
v3: obj->size should be page aligned and delete redundant check(zhenyu)

Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/dmabuf.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 4e1e425189ba..c104f041d0f4 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -45,6 +45,7 @@ static int vgpu_gem_get_pages(
 	int i, ret;
 	gen8_pte_t __iomem *gtt_entries;
 	struct intel_vgpu_fb_info *fb_info;
+	u32 page_num;
 
 	fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info;
 	if (WARN_ON(!fb_info))
@@ -54,14 +55,15 @@ static int vgpu_gem_get_pages(
 	if (unlikely(!st))
 		return -ENOMEM;
 
-	ret = sg_alloc_table(st, fb_info->size, GFP_KERNEL);
+	page_num = obj->base.size >> PAGE_SHIFT;
+	ret = sg_alloc_table(st, page_num, GFP_KERNEL);
 	if (ret) {
 		kfree(st);
 		return ret;
 	}
 	gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
 		(fb_info->start >> PAGE_SHIFT);
-	for_each_sg(st->sgl, sg, fb_info->size, i) {
+	for_each_sg(st->sgl, sg, page_num, i) {
 		sg->offset = 0;
 		sg->length = PAGE_SIZE;
 		sg_dma_address(sg) =
@@ -158,7 +160,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
 		return NULL;
 
 	drm_gem_private_object_init(dev, &obj->base,
-		info->size << PAGE_SHIFT);
+		roundup(info->size, PAGE_SIZE));
 	i915_gem_object_init(obj, &intel_vgpu_gem_ops);
 
 	obj->read_domains = I915_GEM_DOMAIN_GTT;
@@ -206,7 +208,6 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 		struct intel_vgpu_fb_info *info,
 		int plane_id)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_vgpu_primary_plane_format p;
 	struct intel_vgpu_cursor_plane_format c;
 	int ret, tile_height = 1;
@@ -267,8 +268,7 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	info->size = (info->stride * roundup(info->height, tile_height)
-		      + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	info->size = info->stride * roundup(info->height, tile_height);
 	if (info->size == 0) {
 		gvt_vgpu_err("fb size is zero\n");
 		return -EINVAL;
@@ -278,11 +278,6 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 		gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start);
 		return -EFAULT;
 	}
-	if (((info->start >> PAGE_SHIFT) + info->size) >
-		ggtt_total_entries(&dev_priv->ggtt)) {
-		gvt_vgpu_err("Invalid GTT offset or size\n");
-		return -EFAULT;
-	}
 
 	if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) {
 		gvt_vgpu_err("invalid gma addr\n");
-- 
cgit v1.2.3


From 0cf8f58d0a340a2ac744f6e0e3402a89780ecf8b Mon Sep 17 00:00:00 2001
From: Aleksei Gimbitskii <aleksei.gimbitskii@intel.com>
Date: Tue, 23 Apr 2019 15:04:08 +0300
Subject: drm/i915/gvt: Remove typedef and let the enumeration starts from zero

Typedef is not recommended in the Linux kernel.The klocwork static code
analyzer takes the enumeration as the full range of intel_gvt_gtt_type_t.
But the intel_gvt_gtt_type_t will never be used in full range. For
example, the GTT_TYPE_INVALID will never be used as an index of an array.
Remove the typedef and let the enumeration starts from zero to pass
klocwork analysis.

This patch fixed the critial issues #483, #551, #665 reported by
klockwork.

v3:
- Remove the typedef and let the enumeration starts from zero.

Signed-off-by: Aleksei Gimbitskii <aleksei.gimbitskii@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
CC: Colin Xu <colin.xu@intel.com>
Reviewed-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c       | 12 ++++++------
 drivers/gpu/drm/i915/gvt/gtt.h       | 16 ++++++++--------
 drivers/gpu/drm/i915/gvt/handlers.c  |  2 +-
 drivers/gpu/drm/i915/gvt/scheduler.c |  2 +-
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index c2f7d20f6346..7600416db908 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -811,7 +811,7 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
 
 /* Allocate shadow page table without guest page. */
 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
-		struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type)
+		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
 {
 	struct device *kdev = &vgpu->gvt->dev_priv->drm.pdev->dev;
 	struct intel_vgpu_ppgtt_spt *spt = NULL;
@@ -861,7 +861,7 @@ err_free_spt:
 
 /* Allocate shadow page table associated with specific gfn. */
 static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
-		struct intel_vgpu *vgpu, intel_gvt_gtt_type_t type,
+		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
 		unsigned long gfn, bool guest_pde_ips)
 {
 	struct intel_vgpu_ppgtt_spt *spt;
@@ -936,7 +936,7 @@ static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
 {
 	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 	struct intel_vgpu_ppgtt_spt *s;
-	intel_gvt_gtt_type_t cur_pt_type;
+	enum intel_gvt_gtt_type cur_pt_type;
 
 	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
 
@@ -1855,7 +1855,7 @@ static void vgpu_free_mm(struct intel_vgpu_mm *mm)
  * Zero on success, negative error code in pointer if failed.
  */
 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct intel_vgpu_mm *mm;
@@ -2309,7 +2309,7 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
 }
 
 static int alloc_scratch_pages(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t type)
+		enum intel_gvt_gtt_type type)
 {
 	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
 	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
@@ -2594,7 +2594,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
  * Zero on success, negative error code if failed.
  */
 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t root_entry_type, u64 pdps[])
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
 {
 	struct intel_vgpu_mm *mm;
 
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 32c573aea494..42d0394f0de2 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -95,8 +95,8 @@ struct intel_gvt_gtt {
 	unsigned long scratch_mfn;
 };
 
-typedef enum {
-	GTT_TYPE_INVALID = -1,
+enum intel_gvt_gtt_type {
+	GTT_TYPE_INVALID = 0,
 
 	GTT_TYPE_GGTT_PTE,
 
@@ -124,7 +124,7 @@ typedef enum {
 	GTT_TYPE_PPGTT_PML4_PT,
 
 	GTT_TYPE_MAX,
-} intel_gvt_gtt_type_t;
+};
 
 enum intel_gvt_mm_type {
 	INTEL_GVT_MM_GGTT,
@@ -148,7 +148,7 @@ struct intel_vgpu_mm {
 
 	union {
 		struct {
-			intel_gvt_gtt_type_t root_entry_type;
+			enum intel_gvt_gtt_type root_entry_type;
 			/*
 			 * The 4 PDPs in ring context. For 48bit addressing,
 			 * only PDP0 is valid and point to PML4. For 32it
@@ -169,7 +169,7 @@ struct intel_vgpu_mm {
 };
 
 struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
 
 static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm)
 {
@@ -233,7 +233,7 @@ struct intel_vgpu_ppgtt_spt {
 	struct intel_vgpu *vgpu;
 
 	struct {
-		intel_gvt_gtt_type_t type;
+		enum intel_gvt_gtt_type type;
 		bool pde_ips; /* for 64KB PTEs */
 		void *vaddr;
 		struct page *page;
@@ -241,7 +241,7 @@ struct intel_vgpu_ppgtt_spt {
 	} shadow_page;
 
 	struct {
-		intel_gvt_gtt_type_t type;
+		enum intel_gvt_gtt_type type;
 		bool pde_ips; /* for 64KB PTEs */
 		unsigned long gfn;
 		unsigned long write_cnt;
@@ -267,7 +267,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
 		u64 pdps[]);
 
 struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
-		intel_gvt_gtt_type_t root_entry_type, u64 pdps[]);
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
 
 int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]);
 
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 18f01eeb2510..34129eacfd22 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1206,7 +1206,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 
 static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
 {
-	intel_gvt_gtt_type_t root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
+	enum intel_gvt_gtt_type root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
 	struct intel_vgpu_mm *mm;
 	u64 *pdps;
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 8998fa5ab198..7c99bbc3e2b8 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1343,7 +1343,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
 	struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
 	struct intel_vgpu_mm *mm;
 	struct intel_vgpu *vgpu = workload->vgpu;
-	intel_gvt_gtt_type_t root_entry_type;
+	enum intel_gvt_gtt_type root_entry_type;
 	u64 pdps[GVT_RING_CTX_NR_PDPS];
 
 	switch (desc->addressing_mode) {
-- 
cgit v1.2.3


From d9420241d09bac6ba930d95963bd237ec9629db6 Mon Sep 17 00:00:00 2001
From: Aleksei Gimbitskii <aleksei.gimbitskii@intel.com>
Date: Tue, 23 Apr 2019 15:04:09 +0300
Subject: drm/i915/gvt: Do not copy the uninitialized pointer from fb_info

In the code the memcpy() function copied uninitialized pointer in fb_info
to dmabuf_obj->info. Later the pointer in dmabuf_obj->info will be
initialized. To make the code aligned with requirements of the klocwork
static code analyzer, the uninitialized pointer should be initialized
before memcpy().

v2:
- Initialize fb_info.obj in vgpu_get_plane_info(). (Colin Xu)

This patch fixed the critical issue #632 reported by klockwork.

Signed-off-by: Aleksei Gimbitskii <aleksei.gimbitskii@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Colin Xu <colin.xu@intel.com>
Reviewed-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/dmabuf.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index c104f041d0f4..41c8ebc60c63 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -212,6 +212,8 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 	struct intel_vgpu_cursor_plane_format c;
 	int ret, tile_height = 1;
 
+	memset(info, 0, sizeof(*info));
+
 	if (plane_id == DRM_PLANE_TYPE_PRIMARY) {
 		ret = intel_vgpu_decode_primary_plane(vgpu, &p);
 		if (ret)
-- 
cgit v1.2.3


From 4feeea1d8d7713c5838d99c1fdfcc2e90c0f977d Mon Sep 17 00:00:00 2001
From: Aleksei Gimbitskii <aleksei.gimbitskii@intel.com>
Date: Tue, 23 Apr 2019 15:04:10 +0300
Subject: drm/i915/gvt: Use snprintf() to prevent possible buffer overflow.

For printing the intel_vgpu->id, a buffer with fixed length is allocated
on the stack. But if vgpu->id is greater than 6 characters, the buffer
overflow will happen. Even the string of the amount of max vgpu is less
that the length buffer right now, it's better to replace sprintf() with
snprintf().

v2:
- Increase the size of the buffer. (Colin Xu)

This patch fixed the critical issue #673 reported by klocwork.

Signed-off-by: Aleksei Gimbitskii <aleksei.gimbitskii@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Colin Xu <colin.xu@intel.com>
Reviewed-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index 2ec89bcb59f1..8a9606f91e68 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -196,9 +196,9 @@ DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
 int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
 {
 	struct dentry *ent;
-	char name[10] = "";
+	char name[16] = "";
 
-	sprintf(name, "vgpu%d", vgpu->id);
+	snprintf(name, 16, "vgpu%d", vgpu->id);
 	vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root);
 	if (!vgpu->debugfs)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 930c8dfea4b8f320bd7f6a3e5e721ac20e444e03 Mon Sep 17 00:00:00 2001
From: Aleksei Gimbitskii <aleksei.gimbitskii@intel.com>
Date: Tue, 23 Apr 2019 15:04:13 +0300
Subject: drm/i915/gvt: Check if get_next_pt_type() always returns a valid
 value

According to gtt_type_table[] function get_next_pt_type() may returns
GTT_TYPE_INVALID in some cases. To prevent driver to try to create memory
page with invalid data type, additional check is added.

Signed-off-by: Aleksei Gimbitskii <aleksei.gimbitskii@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Colin Xu <colin.xu@intel.com>
Reviewed-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 7600416db908..08c74e65836b 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1076,6 +1076,9 @@ static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
 	} else {
 		int type = get_next_pt_type(we->type);
 
+		if (!gtt_type_is_pt(type))
+			goto err;
+
 		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
 		if (IS_ERR(spt)) {
 			ret = PTR_ERR(spt);
-- 
cgit v1.2.3


From 8631fef7f2037281efca685c9e717eaed33ee37c Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Thu, 25 Apr 2019 17:04:54 +0800
Subject: drm/i915/gvt: Revert "drm/i915/gvt: Refine the snapshort range of
 I915 MCHBAR to optimize gvt-g boot time"

This reverts commit f74a6d9a2c427b6656bc93eacfa6d329ba54d611.

BXT needs to access 0x141000-0x1417ff register to obtain the dram info.
But after the snapshot range of I915_MCHBAR is refined in f74a6d9a2c,
it only initializes the range of 0x144000-0x147fff for VGPU and then
causes that the guest GPU can't get the initialized value for dram
detection on BXT.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 2 +-
 drivers/gpu/drm/i915/gvt/reg.h      | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 34129eacfd22..90673fca792f 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -3303,7 +3303,7 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt)
 /* Special MMIO blocks. */
 static struct gvt_mmio_block mmio_blocks[] = {
 	{D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL},
-	{D_ALL, MCHBAR_MIRROR_REG_BASE, 0x4000, NULL, NULL},
+	{D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL},
 	{D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE,
 		pvinfo_mmio_read, pvinfo_mmio_write},
 	{D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL},
diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h
index 3de5b643b266..33aaa14bfdde 100644
--- a/drivers/gpu/drm/i915/gvt/reg.h
+++ b/drivers/gpu/drm/i915/gvt/reg.h
@@ -126,7 +126,4 @@
 #define RING_GFX_MODE(base)	_MMIO((base) + 0x29c)
 #define VF_GUARDBAND		_MMIO(0x83a4)
 
-/* define the effective range of MCHBAR register on Sandybridge+ */
-#define MCHBAR_MIRROR_REG_BASE	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000)
-
 #endif
-- 
cgit v1.2.3


From 75fdb811d93c8aa4a9f73b63db032b1e6a8668ef Mon Sep 17 00:00:00 2001
From: Colin Xu <colin.xu@intel.com>
Date: Fri, 22 Feb 2019 14:13:42 +0800
Subject: drm/i915/gvt: Add in context mmio 0x20D8 to gen9 mmio list

Depends on GEN family and I915_PARAM_HAS_CONTEXT_ISOLATION, Mesa driver
will decide whether constant buffer 0 address is relative or absolute,
and load GPU initial state by lri to context mmio INSTPM (GEN8)
or 0x20D8 (>=GEN9).
Mesa Commit fa8a764b62
("i965: Use absolute addressing for constant buffer 0 on Kernel 4.16+.")

INSTPM is already added to gen8_engine_mmio_list, but 0x20D8 is missed
in gen9_engine_mmio_list. From GVT point of view, different guest could
have different context so should switch those mmio accordingly.

v2: Update fixes commit ID.

Fixes: 178657139307 ("drm/i915/gvt: vGPU context switch")
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Colin Xu <colin.xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
(cherry picked from commit 1e8b15a1988ed3c7429402017d589422628cdf47)
---
 drivers/gpu/drm/i915/gvt/mmio_context.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index e7e14c842be4..edf6d646eb25 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -132,6 +132,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
 
 	{RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
 	{RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
+	{RCS0, _MMIO(0x20D8), 0xffff, true}, /* 0x20d8 */
 
 	{RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
 	{RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
-- 
cgit v1.2.3


From bdb1ccb080dafc1b4224873a5b759ff85a7d1c10 Mon Sep 17 00:00:00 2001
From: Aaron Liu <aaron.liu@amd.com>
Date: Tue, 30 Apr 2019 09:47:25 +0800
Subject: drm/amdgpu: remove ATPX_DGPU_REQ_POWER_FOR_DISPLAYS check when
 hotplug-in

In amdgpu_atif_handler, when hotplug event received, remove
ATPX_DGPU_REQ_POWER_FOR_DISPLAYS check. This bit's check will cause missing
system resume.

Signed-off-by: Aaron Liu <aaron.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 4376b17ca594..56f8ca2a3bb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -464,8 +464,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 			}
 		}
 		if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
-			if ((adev->flags & AMD_IS_PX) &&
-			    amdgpu_atpx_dgpu_req_power_for_displays()) {
+			if (adev->flags & AMD_IS_PX) {
 				pm_runtime_get_sync(adev->ddev->dev);
 				/* Just fire off a uevent and let userspace tell us what to do */
 				drm_helper_hpd_irq_event(adev->ddev);
-- 
cgit v1.2.3


From 74dcfe74b4ef6cd8f219c42c4ea4df0310c78b33 Mon Sep 17 00:00:00 2001
From: Trigger Huang <Trigger.Huang@amd.com>
Date: Tue, 30 Apr 2019 22:38:51 +0800
Subject: drm/amdgpu: Rearm IRQ in Vega10 SR-IOV if IRQ lost
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In Multi-VFs stress test, sometimes we see IRQ lost when running
benchmark, just rearm it.

Signed-off-by: Trigger Huang <Trigger.Huang@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 37 +++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 1b2f69a9a24e..8d89ab7f0ae8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -31,7 +31,7 @@
 #include "soc15_common.h"
 #include "vega10_ih.h"
 
-
+#define MAX_REARM_RETRY 10
 
 static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev);
 
@@ -381,6 +381,38 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev,
 	ih->rptr += 32;
 }
 
+/**
+ * vega10_ih_irq_rearm - rearm IRQ if lost
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+static void vega10_ih_irq_rearm(struct amdgpu_device *adev,
+			       struct amdgpu_ih_ring *ih)
+{
+	uint32_t reg_rptr = 0;
+	uint32_t v = 0;
+	uint32_t i = 0;
+
+	if (ih == &adev->irq.ih)
+		reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR);
+	else if (ih == &adev->irq.ih1)
+		reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1);
+	else if (ih == &adev->irq.ih2)
+		reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2);
+	else
+		return;
+
+	/* Rearm IRQ / re-wwrite doorbell if doorbell write is lost */
+	for (i = 0; i < MAX_REARM_RETRY; i++) {
+		v = RREG32_NO_KIQ(reg_rptr);
+		if ((v < ih->ring_size) && (v != ih->rptr))
+			WDOORBELL32(ih->doorbell_index, ih->rptr);
+		else
+			break;
+	}
+}
+
 /**
  * vega10_ih_set_rptr - set the IH ring buffer rptr
  *
@@ -395,6 +427,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev,
 		/* XXX check if swapping is necessary on BE */
 		*ih->rptr_cpu = ih->rptr;
 		WDOORBELL32(ih->doorbell_index, ih->rptr);
+
+		if (amdgpu_sriov_vf(adev))
+			vega10_ih_irq_rearm(adev, ih);
 	} else if (ih == &adev->irq.ih) {
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr);
 	} else if (ih == &adev->irq.ih1) {
-- 
cgit v1.2.3


From 3680624e32412ced54f5659431c35b26e3e83923 Mon Sep 17 00:00:00 2001
From: Trigger Huang <Trigger.Huang@amd.com>
Date: Tue, 30 Apr 2019 22:00:31 +0800
Subject: drm/amdgpu: Fix VM clean check method
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

amdgpu_vm_make_compute is used to turn a GFX VM into a compute VM,
the prerequisite is this VM is clean. Let's check if some page tables
are already filled , while not check if some mapping is already made.

Signed-off-by: Trigger Huang <Trigger.Huang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 36 +++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a07c85815b7a..4f10f5aba00b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2756,6 +2756,37 @@ error_free_sched_entity:
 	return r;
 }
 
+/**
+ * amdgpu_vm_check_clean_reserved - check if a VM is clean
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: the VM to check
+ *
+ * check all entries of the root PD, if any subsequent PDs are allocated,
+ * it means there are page table creating and filling, and is no a clean
+ * VM
+ *
+ * Returns:
+ *	0 if this VM is clean
+ */
+static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
+	struct amdgpu_vm *vm)
+{
+	enum amdgpu_vm_level root = adev->vm_manager.root_level;
+	unsigned int entries = amdgpu_vm_num_entries(adev, root);
+	unsigned int i = 0;
+
+	if (!(vm->root.entries))
+		return 0;
+
+	for (i = 0; i < entries; i++) {
+		if (vm->root.entries[i].base.bo)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
  *
@@ -2786,10 +2817,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 		return r;
 
 	/* Sanity checks */
-	if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
-		r = -EINVAL;
+	r = amdgpu_vm_check_clean_reserved(adev, vm);
+	if (r)
 		goto unreserve_bo;
-	}
 
 	if (pasid) {
 		unsigned long flags;
-- 
cgit v1.2.3


From b6818520edb0dc83d8de807cb40dff44995eab86 Mon Sep 17 00:00:00 2001
From: Trigger Huang <Trigger.Huang@amd.com>
Date: Tue, 30 Apr 2019 16:23:29 +0800
Subject: drm/amdgpu: Add IDH_QUERY_ALIVE event for SR-IOV

SR-IOV host side will send IDH_QUERY_ALIVE to guest VM to check
if this guest VM is still alive (not destroyed). The only thing
guest KMD need to do is to send ACK back to host.

Signed-off-by: Trigger Huang <Trigger.Huang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 3 +++
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 8dbad496b29f..2471e7cf75ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -372,6 +372,9 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
 		if (amdgpu_sriov_runtime(adev))
 			schedule_work(&adev->virt.flr_work);
 		break;
+		case IDH_QUERY_ALIVE:
+			xgpu_ai_mailbox_send_ack(adev);
+			break;
 		/* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
 		 * it byfar since that polling thread will handle it,
 		 * other msg like flr complete is not handled here.
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 39d151b79153..077e91a33d62 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -49,6 +49,7 @@ enum idh_event {
 	IDH_FLR_NOTIFICATION_CMPL,
 	IDH_SUCCESS,
 	IDH_FAIL,
+	IDH_QUERY_ALIVE,
 	IDH_EVENT_MAX
 };
 
-- 
cgit v1.2.3


From 570c91d51b337053a90ac91710b5fa5d2aacd311 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Thu, 2 May 2019 09:14:27 -0400
Subject: drm/amd/display: Use long for signed error code checks in commit
 planes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[Why]

The type of 'r' is uint32_t and the return codes for both:

- reservation_object_wait_timeout_rcu
- amdgpu_bo_reserve

...are signed. While it works for the latter since the check is
done on != 0 it doesn't work for the former since we check <= 0.

[How]

Make 'r' a long in commit planes so we're not doing any unsigned/signed
conversion here in the first place.

v2: use long instead of int (Christian)

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 1854506e3e8f..995f9df66142 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5242,7 +5242,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 				    struct drm_crtc *pcrtc,
 				    bool wait_for_vblank)
 {
-	uint32_t i, r;
+	uint32_t i;
 	uint64_t timestamp_ns;
 	struct drm_plane *plane;
 	struct drm_plane_state *old_plane_state, *new_plane_state;
@@ -5253,6 +5253,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 	struct dm_crtc_state *dm_old_crtc_state =
 			to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc));
 	int planes_count = 0, vpos, hpos;
+	long r;
 	unsigned long flags;
 	struct amdgpu_bo *abo;
 	uint64_t tiling_flags;
-- 
cgit v1.2.3


From e766fde6511e2be83acbca1d603035e08de23f3b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 1 May 2019 12:45:36 +0100
Subject: drm/i915: Delay semaphore submission until the start of the signaler

Currently we submit the semaphore busywait as soon as the signaler is
submitted to HW. However, we may submit the signaler as the tail of a
batch of requests, and even not as the first context in the HW list,
i.e. the busywait may start spinning far in advance of the signaler even
starting.

If we wait until the request before the signaler is completed before
submitting the busywait, we prevent the busywait from starting too
early, if the signaler is not first in submission port.

To handle the case where the signaler is at the start of the second (or
later) submission port, we will need to delay the execution callback
until we know the context is promoted to port0. A challenge for later.

Fixes: e88619646971 ("drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190501114541.10077-9-chris@chris-wilson.co.uk
(cherry picked from commit 0d90ccb70211cbf55140e91bd39db684aa4c16e9)
[Joonas: edited Fixes: tag into single line.]
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_request.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b836721d3b13..20c7c77b3768 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -798,6 +798,21 @@ err_unreserve:
 	return ERR_PTR(ret);
 }
 
+static int
+i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
+{
+	if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+		return 0;
+
+	signal = list_prev_entry(signal, ring_link);
+	if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
+		return 0;
+
+	return i915_sw_fence_await_dma_fence(&rq->submit,
+					     &signal->fence, 0,
+					     I915_FENCE_GFP);
+}
+
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
@@ -816,6 +831,10 @@ emit_semaphore_wait(struct i915_request *to,
 						     &from->fence, 0,
 						     I915_FENCE_GFP);
 
+	err = i915_request_await_start(to, from);
+	if (err < 0)
+		return err;
+
 	err = i915_sw_fence_await_dma_fence(&to->semaphore,
 					    &from->fence, 0,
 					    I915_FENCE_GFP);
-- 
cgit v1.2.3


From 2564fe708b580c1ef12b2b527ab6e8afe11ad444 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 4 May 2019 08:07:07 +0100
Subject: drm/i915: Disable semaphore busywaits on saturated systems

Asking the GPU to busywait on a memory address, perhaps not unexpectedly
in hindsight for a shared system, leads to bus contention that affects
CPU programs trying to concurrently access memory. This can manifest as
a drop in transcode throughput on highly over-saturated workloads.

The only clue offered by perf, is that the bus-cycles (perf stat -e
bus-cycles) jumped by 50% when enabling semaphores. This corresponds
with extra CPU active cycles being attributed to intel_idle's mwait.

This patch introduces a heuristic to try and detect when more than one
client is submitting to the GPU pushing it into an oversaturated state.
As we already keep track of when the semaphores are signaled, we can
inspect their state on submitting the busywait batch and if we planned
to use a semaphore but were too late, conclude that the GPU is
overloaded and not try to use semaphores in future requests. In
practice, this means we optimistically try to use semaphores for the
first frame of a transcode job split over multiple engines, and fail if
there are multiple clients active and continue not to use semaphores for
the subsequent frames in the sequence. Periodically, we try to
optimistically switch semaphores back on whenever the client waits to
catch up with the transcode results.

With 1 client, on Broxton J3455, with the relative fps normalized by %cpu:

x no semaphores
+ drm-tip
* patched
+------------------------------------------------------------------------+
|                                                    *                   |
|                                                    *+                  |
|                                                    **+                 |
|                                                    **+  x              |
|                                x               *  +**+  x              |
|                                x  x       *    *  +***x xx             |
|                                x  x       *    * *+***x *x             |
|                                x  x*   +  *    * *****x *x x           |
|                         +    x xx+x*   + ***   * ********* x   *       |
|                         +    x xx+x*   * *** +** ********* xx  *       |
|    *   +         ++++*  +    x*x****+*+* ***+*************+x*  *       |
|*+ +** *+ + +* + *++****** *xxx**********x***+*****************+*++    *|
|                                   |__________A_____M_____|             |
|                           |_______________A____M_________|             |
|                                 |____________A___M________|            |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x 120       2.60475       3.50941       3.31123     3.2143953    0.21117399
+ 120        2.3826       3.57077       3.25101     3.1414161    0.28146407
Difference at 95.0% confidence
	-0.0729792 +/- 0.0629585
	-2.27039% +/- 1.95864%
	(Student's t, pooled s = 0.248814)
* 120       2.35536       3.66713        3.2849     3.2059917    0.24618565
No difference proven at 95.0% confidence

With 10 clients over-saturating the pipeline:

x no semaphores
+ drm-tip
* patched
+------------------------------------------------------------------------+
|                     ++                                        **       |
|                     ++                                        **       |
|                     ++                                        **       |
|                     ++                                        **       |
|                     ++                                    xx ***       |
|                     ++                                    xx ***       |
|                     ++                                    xxx***       |
|                     ++                                    xxx***       |
|                    +++                                    xxx***       |
|                    +++                                    xx****       |
|                    +++                                    xx****       |
|                    +++                                    xx****       |
|                    +++                                    xx****       |
|                    ++++                                   xx****       |
|                   +++++                                   xx****       |
|                   +++++                                 x x******      |
|                  ++++++                                 xxx*******     |
|                  ++++++                                 xxx*******     |
|                  ++++++                                 xxx*******     |
|                  ++++++                                 xx********     |
|                  ++++++                               xxxx********     |
|                  ++++++                               xxxx********     |
|                ++++++++                             xxxxx*********     |
|+ +  +        + ++++++++                           xxx*xx**********x*  *|
|                                                         |__A__|        |
|                 |__AM__|                                               |
|                                                            |__A_|      |
+------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x 120       2.47855        2.8972       2.72376     2.7193402   0.074604933
+ 120       1.17367       1.77459       1.71977     1.6966782   0.085850697
Difference at 95.0% confidence
	-1.02266 +/- 0.0203502
	-37.607% +/- 0.748352%
	(Student's t, pooled s = 0.0804246)
* 120       2.57868       3.00821       2.80142     2.7923878   0.058646477
Difference at 95.0% confidence
	0.0730476 +/- 0.0169791
	2.68622% +/- 0.624383%
	(Student's t, pooled s = 0.0671018)

Indicating that we've recovered the regression from enabling semaphores
on this saturated setup, with a hint towards an overall improvement.

Very similar, but of smaller magnitude, results are observed on both
Skylake(gt2) and Kabylake(gt4). This may be due to the reduced impact of
bus-cycles, where we see a 50% hit on Broxton, it is only 10% on the big
core, in this particular test.

One observation to make here is that for a greedy client trying to
maximise its own throughput, using semaphores is the right choice. It is
only the holistic system-wide view that semaphores of one client
impacts another and reduces the overall throughput where we would choose
to disable semaphores.

The most noticeable negactive impact this has is on the no-op
microbenchmarks, which are also very notable for having no cpu bus load.
In particular, this increases the runtime and energy consumption of
gem_exec_whisper.

Fixes: e88619646971 ("drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Dmitry Ermilov <dmitry.ermilov@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190504070707.30902-1-chris@chris-wilson.co.uk
(cherry picked from commit ca6e56f654e7b241256ffba78cd2abb22aa3bc97)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_request.c        | 40 +++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_context.c       |  1 +
 drivers/gpu/drm/i915/intel_context_types.h |  3 +++
 3 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 20c7c77b3768..ce342f7f7ddb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request)
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
 
+	/*
+	 * Are we using semaphores when the gpu is already saturated?
+	 *
+	 * Using semaphores incurs a cost in having the GPU poll a
+	 * memory location, busywaiting for it to change. The continual
+	 * memory reads can have a noticeable impact on the rest of the
+	 * system with the extra bus traffic, stalling the cpu as it too
+	 * tries to access memory across the bus (perf stat -e bus-cycles).
+	 *
+	 * If we installed a semaphore on this request and we only submit
+	 * the request after the signaler completed, that indicates the
+	 * system is overloaded and using semaphores at this time only
+	 * increases the amount of work we are doing. If so, we disable
+	 * further use of semaphores until we are idle again, whence we
+	 * optimistically try again.
+	 */
+	if (request->sched.semaphores &&
+	    i915_sw_fence_signaled(&request->semaphore))
+		request->hw_context->saturated |= request->sched.semaphores;
+
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
@@ -813,6 +833,24 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 					     I915_FENCE_GFP);
 }
 
+static intel_engine_mask_t
+already_busywaiting(struct i915_request *rq)
+{
+	/*
+	 * Polling a semaphore causes bus traffic, delaying other users of
+	 * both the GPU and CPU. We want to limit the impact on others,
+	 * while taking advantage of early submission to reduce GPU
+	 * latency. Therefore we restrict ourselves to not using more
+	 * than one semaphore from each source, and not using a semaphore
+	 * if we have detected the engine is saturated (i.e. would not be
+	 * submitted early and cause bus traffic reading an already passed
+	 * semaphore).
+	 *
+	 * See the are-we-too-late? check in __i915_request_submit().
+	 */
+	return rq->sched.semaphores | rq->hw_context->saturated;
+}
+
 static int
 emit_semaphore_wait(struct i915_request *to,
 		    struct i915_request *from,
@@ -826,7 +864,7 @@ emit_semaphore_wait(struct i915_request *to,
 	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
 
 	/* Just emit the first semaphore we see as request space is limited. */
-	if (to->sched.semaphores & from->engine->mask)
+	if (already_busywaiting(to) & from->engine->mask)
 		return i915_sw_fence_await_dma_fence(&to->submit,
 						     &from->fence, 0,
 						     I915_FENCE_GFP);
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 8931e0fee873..924cc556223a 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce,
 	ce->gem_context = ctx;
 	ce->engine = engine;
 	ce->ops = engine->cops;
+	ce->saturated = 0;
 
 	INIT_LIST_HEAD(&ce->signal_link);
 	INIT_LIST_HEAD(&ce->signals);
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 68b4ca1611e0..339c7437fe82 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 
 #include "i915_active_types.h"
+#include "intel_engine_types.h"
 
 struct i915_gem_context;
 struct i915_vma;
@@ -58,6 +59,8 @@ struct intel_context {
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
 
+	intel_engine_mask_t saturated; /* submitting semaphores too late? */
+
 	/**
 	 * active_tracker: Active tracker for the external rq activity
 	 * on this intel_context object.
-- 
cgit v1.2.3


From 992fbe8ce035d8c3fb2615ac6e8faeaa7c2fa2c3 Mon Sep 17 00:00:00 2001
From: Trigger Huang <Trigger.Huang@amd.com>
Date: Mon, 25 Feb 2019 19:19:56 +0800
Subject: drm/amdgpu: Use FW addr returned by PSP for VF MM

One Vega10 SR-IOV VF, the FW address returned by PSP should be
set into the init table, while not the original BO mc address.
otherwise, UVD and VCE IB test will fail under Vega10 SR-IOV

reference:
	commit bfcea5204287 ("drm/amdgpu:change VEGA booting with firmware loaded by PSP")
	commit aa5873dca463 ("drm/amdgpu: Change VCE booting with firmware loaded by PSP")

Signed-off-by: Trigger Huang <Trigger.Huang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 16 ++++++++++------
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 17 +++++++++++------
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index dc461df48da0..2191d3d0a219 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -787,10 +787,13 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
 							   0xFFFFFFFF, 0x00000004);
 			/* mc resume*/
 			if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
-							    lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
-							    upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i,
+							mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+							adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo);
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i,
+							mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+							adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi);
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), 0);
 				offset = 0;
 			} else {
 				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
@@ -798,10 +801,11 @@ static int uvd_v7_0_sriov_start(struct amdgpu_device *adev)
 				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
 							    upper_32_bits(adev->uvd.inst[i].gpu_addr));
 				offset = size;
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
+							AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+
 			}
 
-			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
-						    AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
 
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index f3f5938430d4..c0ec27991c22 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -244,13 +244,18 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0);
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0);
 
+		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+			uint32_t low = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo;
+			uint32_t hi = adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi;
+			uint64_t tmr_mc_addr = (uint64_t)(hi) << 32 | low;
+
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
-						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
-						adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8);
+						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), tmr_mc_addr >> 8);
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
-						(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff);
+						(tmr_mc_addr >> 40) & 0xff);
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0);
 		} else {
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR0),
@@ -258,6 +263,9 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR0),
 						(adev->vce.gpu_addr >> 40) & 0xff);
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
+						offset & ~0x0f000000);
+
 		}
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0,
 						mmVCE_LMI_VCPU_CACHE_40BIT_BAR1),
@@ -272,10 +280,7 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev)
 						mmVCE_LMI_VCPU_CACHE_64BIT_BAR2),
 						(adev->vce.gpu_addr >> 40) & 0xff);
 
-		offset = AMDGPU_VCE_FIRMWARE_OFFSET;
 		size = VCE_V4_0_FW_SIZE;
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0),
-					offset & ~0x0f000000);
 		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size);
 
 		offset = (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) ? offset + size : 0;
-- 
cgit v1.2.3


From db8a974f7e6966d73b4e6afa673c5b8bc31a111e Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Sun, 5 May 2019 11:00:50 +0800
Subject: drm/amd/powerplay: check for invalid profile_exit setting

profile_exit performance level setting is valid only
when current mode is in profile mode.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 95144e49c7f9..34471dbaa872 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -342,6 +342,16 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
 	if (current_level == level)
 		return count;
 
+	/* profile_exit setting is valid only when current mode is in profile mode */
+	if (!(current_level & (AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD |
+	    AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK |
+	    AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK |
+	    AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) &&
+	    (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) {
+		pr_err("Currently not in any profile mode!\n");
+		return -EINVAL;
+	}
+
 	if (is_support_sw_smu(adev)) {
 		mutex_lock(&adev->pm.mutex);
 		if (adev->pm.dpm.thermal_active) {
-- 
cgit v1.2.3


From 2e26ccb119bde03584be53406bbd22e711b0d6e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 6 May 2019 19:57:52 +0200
Subject: drm/radeon: prefer lower reference dividers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of the closest reference divider prefer the lowest,
this fixes flickering issues on HP Compaq nx9420.

Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=108514
Suggested-by: Paul Dufresne <dufresnep@gmail.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index aa898c699101..433df7036f96 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -922,12 +922,12 @@ static void avivo_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_div,
 	ref_div_max = max(min(100 / post_div, ref_div_max), 1u);
 
 	/* get matching reference and feedback divider */
-	*ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
+	*ref_div = min(max(den/post_div, 1u), ref_div_max);
 	*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
 
 	/* limit fb divider to its maximum */
 	if (*fb_div > fb_div_max) {
-		*ref_div = DIV_ROUND_CLOSEST(*ref_div * fb_div_max, *fb_div);
+		*ref_div = (*ref_div * fb_div_max)/(*fb_div);
 		*fb_div = fb_div_max;
 	}
 }
-- 
cgit v1.2.3


From 9d6fea5744d6798353f37ac42a8a653a2607ca69 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 8 May 2019 21:45:06 -0500
Subject: drm/amdgpu/psp: move psp version specific function pointers to
 early_init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case we need to use them for GPU reset prior initializing the
asic.  Fixes a crash if the driver attempts to reset the GPU at driver
load time.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 905cce1814f3..05897b05766b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -38,18 +38,10 @@ static void psp_set_funcs(struct amdgpu_device *adev);
 static int psp_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct psp_context *psp = &adev->psp;
 
 	psp_set_funcs(adev);
 
-	return 0;
-}
-
-static int psp_sw_init(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct psp_context *psp = &adev->psp;
-	int ret;
-
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
@@ -67,6 +59,15 @@ static int psp_sw_init(void *handle)
 
 	psp->adev = adev;
 
+	return 0;
+}
+
+static int psp_sw_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct psp_context *psp = &adev->psp;
+	int ret;
+
 	ret = psp_init_microcode(psp);
 	if (ret) {
 		DRM_ERROR("Failed to load psp firmware!\n");
-- 
cgit v1.2.3


From 396dd8143bdd94bd1c358a228a631c8c895a1126 Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Tue, 23 Apr 2019 17:28:10 +0800
Subject: drm/i915/fbc: disable framebuffer compression on GeminiLake

On many (all?) the Gemini Lake systems we work with, there is frequent
momentary graphical corruption at the top of the screen, and it seems
that disabling framebuffer compression can avoid this.

The ticket was reported 6 months ago and has already affected a
multitude of users, without any real progress being made. So, lets
disable framebuffer compression on GeminiLake until a solution is found.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108085
Fixes: fd7d6c5c8f3e ("drm/i915: enable FBC on gen9+ too")
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.11+
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190423092810.28359-1-jian-hong@endlessm.com
(cherry picked from commit 1d25724b41fad7eeb2c3058a5c8190d6ece73e08)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_fbc.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index c805a0966395..5679f2fffb7c 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -1280,6 +1280,10 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
 	if (!HAS_FBC(dev_priv))
 		return 0;
 
+	/* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */
+	if (IS_GEMINILAKE(dev_priv))
+		return 0;
+
 	if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9)
 		return 1;
 
-- 
cgit v1.2.3


From da471250706e2f103a1627d1d279c9de44325993 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 25 Apr 2019 19:29:05 +0300
Subject: drm/i915: Fix fastset vs. pfit on/off on HSW EDP transcoder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On HSW the pipe A panel fitter lives inside the display power well,
and the input MUX for the EDP transcoder needs to be configured
appropriately to route the data through the power well as needed.
Changing the MUX setting is not allowed while the pipe is active,
so we need to force a full modeset whenever we need to change it.

Currently we may end up doing a fastset which won't change the
MUX settings, but it will drop the power well reference, and that
kills the pipe.

Cc: stable@vger.kernel.org
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Fixes: d19f958db23c ("drm/i915: Enable fastset for non-boot modesets.")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190425162906.5242-1-ville.syrjala@linux.intel.com
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
(cherry picked from commit 13b7648b7eab7e8259a2fb267b498bd9eba81ca0)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c  |  9 +++++++++
 drivers/gpu/drm/i915/intel_pipe_crc.c | 13 ++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3bd40a4a6739..5098228f1302 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12082,6 +12082,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 			  struct intel_crtc_state *pipe_config,
 			  bool adjust)
 {
+	struct intel_crtc *crtc = to_intel_crtc(current_config->base.crtc);
 	bool ret = true;
 	bool fixup_inherited = adjust &&
 		(current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) &&
@@ -12303,6 +12304,14 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 		PIPE_CONF_CHECK_X(gmch_pfit.pgm_ratios);
 	PIPE_CONF_CHECK_X(gmch_pfit.lvds_border_bits);
 
+	/*
+	 * Changing the EDP transcoder input mux
+	 * (A_ONOFF vs. A_ON) requires a full modeset.
+	 */
+	if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A &&
+	    current_config->cpu_transcoder == TRANSCODER_EDP)
+		PIPE_CONF_CHECK_BOOL(pch_pfit.enabled);
+
 	if (!adjust) {
 		PIPE_CONF_CHECK_I(pipe_src_w);
 		PIPE_CONF_CHECK_I(pipe_src_h);
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index e94b5b1bc1b7..e7c7be4911c1 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -311,10 +311,17 @@ retry:
 	pipe_config->base.mode_changed = pipe_config->has_psr;
 	pipe_config->crc_enabled = enable;
 
-	if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A) {
+	if (IS_HASWELL(dev_priv) &&
+	    pipe_config->base.active && crtc->pipe == PIPE_A &&
+	    pipe_config->cpu_transcoder == TRANSCODER_EDP) {
+		bool old_need_power_well = pipe_config->pch_pfit.enabled ||
+			pipe_config->pch_pfit.force_thru;
+		bool new_need_power_well = pipe_config->pch_pfit.enabled ||
+			enable;
+
 		pipe_config->pch_pfit.force_thru = enable;
-		if (pipe_config->cpu_transcoder == TRANSCODER_EDP &&
-		    pipe_config->pch_pfit.enabled != enable)
+
+		if (old_need_power_well != new_need_power_well)
 			pipe_config->base.connectors_changed = true;
 	}
 
-- 
cgit v1.2.3


From c36beba6b296b3c05a0f29753b04775e5ae23886 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 8 May 2019 12:24:52 +0100
Subject: drm/i915: Seal races between async GPU cancellation, retirement and
 signaling

Currently there is an underlying assumption that i915_request_unsubmit()
is synchronous wrt the GPU -- that is the request is no longer in flight
as we remove it. In the near future that may change, and this may upset
our signaling as we can process an interrupt for that request while it
is no longer in flight.

CPU0					CPU1
intel_engine_breadcrumbs_irq
(queue request completion)
					i915_request_cancel_signaling
...					...
					i915_request_enable_signaling
dma_fence_signal

Hence in the time it took us to drop the lock to signal the request, a
preemption event may have occurred and re-queued the request. In the
process, that request would have seen I915_FENCE_FLAG_SIGNAL clear and
so reused the rq->signal_link that was in use on CPU0, leading to bad
pointer chasing in intel_engine_breadcrumbs_irq.

A related issue was that if someone started listening for a signal on a
completed but no longer in-flight request, we missed the opportunity to
immediately signal that request.

Furthermore, as intel_contexts may be immediately released during
request retirement, in order to be entirely sure that
intel_engine_breadcrumbs_irq may no longer dereference the intel_context
(ce->signals and ce->signal_link), we must wait for irq spinlock.

In order to prevent the race, we use a bit in the fence.flags to signal
the transfer onto the signal list inside intel_engine_breadcrumbs_irq.
For simplicity, we use the DMA_FENCE_FLAG_SIGNALED_BIT as it then
quickly signals to any outside observer that the fence is indeed signaled.

v2: Sketch out potential dma-fence API for manual signaling
v3: And the test_and_set_bit()

Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190508112452.18942-1-chris@chris-wilson.co.uk
(cherry picked from commit 0152b3b3f49b36b0f1a1bf9f0353dc636f41d8f0)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_request.c         |  1 +
 drivers/gpu/drm/i915/intel_breadcrumbs.c    | 78 +++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_guc_submission.c |  1 -
 3 files changed, 58 insertions(+), 22 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index ce342f7f7ddb..f6c78c0fa74b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -452,6 +452,7 @@ void __i915_request_submit(struct i915_request *request)
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
 	    !i915_request_enable_breadcrumb(request))
 		intel_engine_queue_breadcrumbs(engine);
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 3cbffd400b1b..832cb6b1e9bd 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kthread.h>
+#include <trace/events/dma_fence.h>
 #include <uapi/linux/sched/types.h>
 
 #include "i915_drv.h"
@@ -80,9 +81,39 @@ static inline bool __request_completed(const struct i915_request *rq)
 	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
 }
 
+static bool
+__dma_fence_signal(struct dma_fence *fence)
+{
+	return !test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags);
+}
+
+static void
+__dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
+{
+	fence->timestamp = timestamp;
+	set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
+	trace_dma_fence_signaled(fence);
+}
+
+static void
+__dma_fence_signal__notify(struct dma_fence *fence)
+{
+	struct dma_fence_cb *cur, *tmp;
+
+	lockdep_assert_held(fence->lock);
+	lockdep_assert_irqs_disabled();
+
+	list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+		INIT_LIST_HEAD(&cur->node);
+		cur->func(fence, cur);
+	}
+	INIT_LIST_HEAD(&fence->cb_list);
+}
+
 void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	const ktime_t timestamp = ktime_get();
 	struct intel_context *ce, *cn;
 	struct list_head *pos, *next;
 	LIST_HEAD(signal);
@@ -104,6 +135,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 
 			GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
 					     &rq->fence.flags));
+			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+			if (!__dma_fence_signal(&rq->fence))
+				continue;
 
 			/*
 			 * Queue for execution after dropping the signaling
@@ -111,14 +146,6 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 			 * more signalers to the same context or engine.
 			 */
 			i915_request_get(rq);
-
-			/*
-			 * We may race with direct invocation of
-			 * dma_fence_signal(), e.g. i915_request_retire(),
-			 * so we need to acquire our reference to the request
-			 * before we cancel the breadcrumb.
-			 */
-			clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 			list_add_tail(&rq->signal_link, &signal);
 		}
 
@@ -141,7 +168,12 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 		struct i915_request *rq =
 			list_entry(pos, typeof(*rq), signal_link);
 
-		dma_fence_signal(&rq->fence);
+		__dma_fence_signal__timestamp(&rq->fence, timestamp);
+
+		spin_lock(&rq->lock);
+		__dma_fence_signal__notify(&rq->fence);
+		spin_unlock(&rq->lock);
+
 		i915_request_put(rq);
 	}
 }
@@ -243,19 +275,17 @@ void intel_engine_fini_breadcrumbs(struct intel_engine