From dd68f2ba0720e76c3a5bfa3f639c546f926792f5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 29 Mar 2017 13:13:15 +0100
Subject: drm/i915/execlists: Wrap tail pointer after reset tweaking

If the request->wa_tail is 0 (because it landed exactly on the end of
the ringbuffer), when we reconstruct request->tail following a reset we
fill in an illegal value (-8 or 0x001ffff8). As a result, RING_HEAD is
never able to catch up with RING_TAIL and the GPU spins endlessly. If
the ring contains a couple of breadcrumbs, even our hangcheck is unable
to catch the busy-looping as the ACTHD and seqno continually advance.

v2: Move the wrap into a common intel_ring_wrap().

Fixes: a3aabe86a340 ("drm/i915/execlists: Reinitialise context image after GPU hang")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Link: http://patchwork.freedesktop.org/patch/msgid/20170327130009.4678-1-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
(cherry picked from commit 450362d3fe866b14304f309b5fffba0c33fbfbc3)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170329121315.1290-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/intel_lrc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/i915/intel_lrc.c')

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 471af3b480ad..91555d4e9129 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1440,7 +1440,9 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	GEM_BUG_ON(request->ctx != port[0].request->ctx);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
+	request->tail =
+		intel_ring_wrap(request->ring,
+				request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
-- 
cgit v1.2.3


From 6c9a8cdad48a04795dbc35ac3370afa3180045ae Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 27 Mar 2017 21:21:43 +0100
Subject: drm/i915: Avoid lock dropping between rescheduling

Unlocking is dangerous. In this case we combine an early update to the
out-of-queue request, because we know that it will be inserted into the
correct FIFO priority-ordered slot when it becomes ready in the future.
However, given sufficient enthusiasm, it may become ready as we are
continuing to reschedule, and so may gazump the FIFO if we have since
dropped its spinlock. The result is that it may be executed too early,
before its dependencies.

v2: Move all work into the second phase over the topological sort. This
removes the shortcut on the out-of-rbtree request to ensure that we only
adjust its priority after adjusting all of its dependencies.

Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Link: http://patchwork.freedesktop.org/patch/msgid/20170327202143.7972-1-chris@chris-wilson.co.uk
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
(cherry picked from commit a79a524e9260d4ffaff88348615e70fb3d393692)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 53 ++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

(limited to 'drivers/gpu/drm/i915/intel_lrc.c')

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 91555d4e9129..47517a02f0a4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -670,15 +670,14 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 static struct intel_engine_cs *
 pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 {
-	struct intel_engine_cs *engine;
+	struct intel_engine_cs *engine =
+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+
+	GEM_BUG_ON(!locked);
 
-	engine = container_of(pt,
-			      struct drm_i915_gem_request,
-			      priotree)->engine;
 	if (engine != locked) {
-		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
+		spin_unlock(&locked->timeline->lock);
+		spin_lock(&engine->timeline->lock);
 	}
 
 	return engine;
@@ -686,7 +685,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 
 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
 	LIST_HEAD(dfs);
@@ -720,26 +719,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
 
-		list_for_each_entry(p, &pt->signalers_list, signal_link)
+		/* Within an engine, there can be no cycle, but we may
+		 * refer to the same dependency chain multiple times
+		 * (redundant dependencies are not eliminated) and across
+		 * engines.
+		 */
+		list_for_each_entry(p, &pt->signalers_list, signal_link) {
+			GEM_BUG_ON(p->signaler->priority < pt->priority);
 			if (prio > READ_ONCE(p->signaler->priority))
 				list_move_tail(&p->dfs_link, &dfs);
+		}
 
 		list_safe_reset_next(dep, p, dfs_link);
-		if (!RB_EMPTY_NODE(&pt->node))
-			continue;
-
-		engine = pt_lock_engine(pt, engine);
-
-		/* If it is not already in the rbtree, we can update the
-		 * priority inplace and skip over it (and its dependencies)
-		 * if it is referenced *again* as we descend the dfs.
-		 */
-		if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
-			pt->priority = prio;
-			list_del_init(&dep->dfs_link);
-		}
 	}
 
+	engine = request->engine;
+	spin_lock_irq(&engine->timeline->lock);
+
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
@@ -751,16 +747,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (prio <= pt->priority)
 			continue;
 
-		GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
-
 		pt->priority = prio;
-		rb_erase(&pt->node, &engine->execlist_queue);
-		if (insert_request(pt, &engine->execlist_queue))
-			engine->execlist_first = &pt->node;
+		if (!RB_EMPTY_NODE(&pt->node)) {
+			rb_erase(&pt->node, &engine->execlist_queue);
+			if (insert_request(pt, &engine->execlist_queue))
+				engine->execlist_first = &pt->node;
+		}
 	}
 
-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	spin_unlock_irq(&engine->timeline->lock);
 
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
-- 
cgit v1.2.3