Merge tag 'drm-intel-next-2019-02-07' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

UAPI Changes: - Expose RPCS (SSEU) configuration to userspace for Ice Lake in order to allow userspace to reconfigure the subslice config per context basis. (Tvrtko, Lionel) Driver Changes: - Execbuf and preemption improvements including selftests (Chris) - Rename HAS_GMCH_DISPLAY/HAS_GMCH (Rodrigo) - Debugfs error handling fix for robustness (Greg) - Improve reg_rw traces (Ville) - Push clear_intel_crtc_state onto the heap (Chris) - Watermark fixes for Ice Lake (Ville) - Fix enable count array size and bounds checking (Tvrtko) - MST Fixes (Lyude) - Prevent race and handle error on I915_GEM_MMAP (Joonas) - Initial rework for an full atomic gamma mode (Ville) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190208165000.GA30314@intel.com
author: Dave Airlie <airlied@redhat.com> 2019-02-11 13:41:53 +1000
committer: Dave Airlie <airlied@redhat.com> 2019-02-11 13:41:59 +1000
commit: 5ea3998d56346975c2701df18fb5b6e3ab5c8d9e (patch)
tree: 07a9f5d74cc761cc73e66cefd0b7498341a8329f
parent: 38f070eb125a9253ec1acd641e7159555f57132a (diff)
parent: c09d39166d8a3f3788680b32dbb0a40a70de32e2 (diff)
55 files changed, 2756 insertions, 937 deletions
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 210d0e8777b6..1787e1299b1b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -57,7 +57,9 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
 i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # GEM code
-i915-y += i915_cmd_parser.o \
+i915-y += \
+	  i915_active.o \
+	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
 	  i915_gem_clflush.o \
 	  i915_gem_context.o \
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
new file mode 100644
index 000000000000..215b6ff8aa73
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -0,0 +1,286 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_active.h"
+
+#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
+
+/*
+ * Active refs memory management
+ *
+ * To be more economical with memory, we reap all the i915_active trees as
+ * they idle (when we know the active requests are inactive) and allocate the
+ * nodes from a local slab cache to hopefully reduce the fragmentation.
+ */
+static struct i915_global_active {
+	struct kmem_cache *slab_cache;
+} global;
+
+struct active_node {
+	struct i915_active_request base;
+	struct i915_active *ref;
+	struct rb_node node;
+	u64 timeline;
+};
+
+static void
+__active_park(struct i915_active *ref)
+{
+	struct active_node *it, *n;
+
+	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		GEM_BUG_ON(i915_active_request_isset(&it->base));
+		kmem_cache_free(global.slab_cache, it);
+	}
+	ref->tree = RB_ROOT;
+}
+
+static void
+__active_retire(struct i915_active *ref)
+{
+	GEM_BUG_ON(!ref->count);
+	if (--ref->count)
+		return;
+
+	/* return the unused nodes to our slabcache */
+	__active_park(ref);
+
+	ref->retire(ref);
+}
+
+static void
+node_retire(struct i915_active_request *base, struct i915_request *rq)
+{
+	__active_retire(container_of(base, struct active_node, base)->ref);
+}
+
+static void
+last_retire(struct i915_active_request *base, struct i915_request *rq)
+{
+	__active_retire(container_of(base, struct i915_active, last));
+}
+
+static struct i915_active_request *
+active_instance(struct i915_active *ref, u64 idx)
+{
+	struct active_node *node;
+	struct rb_node **p, *parent;
+	struct i915_request *old;
+
+	/*
+	 * We track the most recently used timeline to skip a rbtree search
+	 * for the common case, under typical loads we never need the rbtree
+	 * at all. We can reuse the last slot if it is empty, that is
+	 * after the previous activity has been retired, or if it matches the
+	 * current timeline.
+	 *
+	 * Note that we allow the timeline to be active simultaneously in
+	 * the rbtree and the last cache. We do this to avoid having
+	 * to search and replace the rbtree element for a new timeline, with
+	 * the cost being that we must be aware that the ref may be retired
+	 * twice for the same timeline (as the older rbtree element will be
+	 * retired before the new request added to last).
+	 */
+	old = i915_active_request_raw(&ref->last, BKL(ref));
+	if (!old || old->fence.context == idx)
+		goto out;
+
+	/* Move the currently active fence into the rbtree */
+	idx = old->fence.context;
+
+	parent = NULL;
+	p = &ref->tree.rb_node;
+	while (*p) {
+		parent = *p;
+
+		node = rb_entry(parent, struct active_node, node);
+		if (node->timeline == idx)
+			goto replace;
+
+		if (node->timeline < idx)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+
+	node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
+
+	/* kmalloc may retire the ref->last (thanks shrinker)! */
+	if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) {
+		kmem_cache_free(global.slab_cache, node);
+		goto out;
+	}
+
+	if (unlikely(!node))
+		return ERR_PTR(-ENOMEM);
+
+	i915_active_request_init(&node->base, NULL, node_retire);
+	node->ref = ref;
+	node->timeline = idx;
+
+	rb_link_node(&node->node, parent, p);
+	rb_insert_color(&node->node, &ref->tree);
+
+replace:
+	/*
+	 * Overwrite the previous active slot in the rbtree with last,
+	 * leaving last zeroed. If the previous slot is still active,
+	 * we must be careful as we now only expect to receive one retire
+	 * callback not two, and so much undo the active counting for the
+	 * overwritten slot.
+	 */
+	if (i915_active_request_isset(&node->base)) {
+		/* Retire ourselves from the old rq->active_list */
+		__list_del_entry(&node->base.link);
+		ref->count--;
+		GEM_BUG_ON(!ref->count);
+	}
+	GEM_BUG_ON(list_empty(&ref->last.link));
+	list_replace_init(&ref->last.link, &node->base.link);
+	node->base.request = fetch_and_zero(&ref->last.request);
+
+out:
+	return &ref->last;
+}
+
+void i915_active_init(struct drm_i915_private *i915,
+		      struct i915_active *ref,
+		      void (*retire)(struct i915_active *ref))
+{
+	ref->i915 = i915;
+	ref->retire = retire;
+	ref->tree = RB_ROOT;
+	i915_active_request_init(&ref->last, NULL, last_retire);
+	ref->count = 0;
+}
+
+int i915_active_ref(struct i915_active *ref,
+		    u64 timeline,
+		    struct i915_request *rq)
+{
+	struct i915_active_request *active;
+
+	active = active_instance(ref, timeline);
+	if (IS_ERR(active))
+		return PTR_ERR(active);
+
+	if (!i915_active_request_isset(active))
+		ref->count++;
+	__i915_active_request_set(active, rq);
+
+	GEM_BUG_ON(!ref->count);
+	return 0;
+}
+
+bool i915_active_acquire(struct i915_active *ref)
+{
+	lockdep_assert_held(BKL(ref));
+	return !ref->count++;
+}
+
+void i915_active_release(struct i915_active *ref)
+{
+	lockdep_assert_held(BKL(ref));
+	__active_retire(ref);
+}
+
+int i915_active_wait(struct i915_active *ref)
+{
+	struct active_node *it, *n;
+	int ret = 0;
+
+	if (i915_active_acquire(ref))
+		goto out_release;
+
+	ret = i915_active_request_retire(&ref->last, BKL(ref));
+	if (ret)
+		goto out_release;
+
+	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		ret = i915_active_request_retire(&it->base, BKL(ref));
+		if (ret)
+			break;
+	}
+
+out_release:
+	i915_active_release(ref);
+	return ret;
+}
+
+int i915_request_await_active_request(struct i915_request *rq,
+				      struct i915_active_request *active)
+{
+	struct i915_request *barrier =
+		i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
+
+	return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
+int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
+{
+	struct active_node *it, *n;
+	int ret;
+
+	ret = i915_request_await_active_request(rq, &ref->last);
+	if (ret)
+		return ret;
+
+	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+		ret = i915_request_await_active_request(rq, &it->base);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+void i915_active_fini(struct i915_active *ref)
+{
+	GEM_BUG_ON(i915_active_request_isset(&ref->last));
+	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
+	GEM_BUG_ON(ref->count);
+}
+#endif
+
+int i915_active_request_set(struct i915_active_request *active,
+			    struct i915_request *rq)
+{
+	int err;
+
+	/* Must maintain ordering wrt previous active requests */
+	err = i915_request_await_active_request(rq, active);
+	if (err)
+		return err;
+
+	__i915_active_request_set(active, rq);
+	return 0;
+}
+
+void i915_active_retire_noop(struct i915_active_request *active,
+			     struct i915_request *request)
+{
+	/* Space left intentionally blank */
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_active.c"
+#endif
+
+int __init i915_global_active_init(void)
+{
+	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
+	if (!global.slab_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void __exit i915_global_active_exit(void)
+{
+	kmem_cache_destroy(global.slab_cache);
+}
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
new file mode 100644
index 000000000000..12b5c1d287d1
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -0,0 +1,425 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_H_
+#define _I915_ACTIVE_H_
+
+#include <linux/lockdep.h>
+
+#include "i915_active_types.h"
+#include "i915_request.h"
+
+/*
+ * We treat requests as fences. This is not be to confused with our
+ * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
+ * We use the fences to synchronize access from the CPU with activity on the
+ * GPU, for example, we should not rewrite an object's PTE whilst the GPU
+ * is reading them. We also track fences at a higher level to provide
+ * implicit synchronisation around GEM objects, e.g. set-domain will wait
+ * for outstanding GPU rendering before marking the object ready for CPU
+ * access, or a pageflip will wait until the GPU is complete before showing
+ * the frame on the scanout.
+ *
+ * In order to use a fence, the object must track the fence it needs to
+ * serialise with. For example, GEM objects want to track both read and
+ * write access so that we can perform concurrent read operations between
+ * the CPU and GPU engines, as well as waiting for all rendering to
+ * complete, or waiting for the last GPU user of a "fence register". The
+ * object then embeds a #i915_active_request to track the most recent (in
+ * retirement order) request relevant for the desired mode of access.
+ * The #i915_active_request is updated with i915_active_request_set() to
+ * track the most recent fence request, typically this is done as part of
+ * i915_vma_move_to_active().
+ *
+ * When the #i915_active_request completes (is retired), it will
+ * signal its completion to the owner through a callback as well as mark
+ * itself as idle (i915_active_request.request == NULL). The owner
+ * can then perform any action, such as delayed freeing of an active
+ * resource including itself.
+ */
+
+void i915_active_retire_noop(struct i915_active_request *active,
+			     struct i915_request *request);
+
+/**
+ * i915_active_request_init - prepares the activity tracker for use
+ * @active - the active tracker
+ * @rq - initial request to track, can be NULL
+ * @func - a callback when then the tracker is retired (becomes idle),
+ *         can be NULL
+ *
+ * i915_active_request_init() prepares the embedded @active struct for use as
+ * an activity tracker, that is for tracking the last known active request
+ * associated with it. When the last request becomes idle, when it is retired
+ * after completion, the optional callback @func is invoked.
+ */
+static inline void
+i915_active_request_init(struct i915_active_request *active,
+			 struct i915_request *rq,
+			 i915_active_retire_fn retire)
+{
+	RCU_INIT_POINTER(active->request, rq);
+	INIT_LIST_HEAD(&active->link);
+	active->retire = retire ?: i915_active_retire_noop;
+}
+
+#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
+
+/**
+ * i915_active_request_set - updates the tracker to watch the current request
+ * @active - the active tracker
+ * @request - the request to watch
+ *
+ * __i915_active_request_set() watches the given @request for completion. Whilst
+ * that @request is busy, the @active reports busy. When that @request is
+ * retired, the @active tracker is updated to report idle.
+ */
+static inline void
+__i915_active_request_set(struct i915_active_request *active,
+			  struct i915_request *request)
+{
+	list_move(&active->link, &request->active_list);
+	rcu_assign_pointer(active->request, request);
+}
+
+int __must_check
+i915_active_request_set(struct i915_active_request *active,
+			struct i915_request *rq);
+
+/**
+ * i915_active_request_set_retire_fn - updates the retirement callback
+ * @active - the active tracker
+ * @fn - the routine called when the request is retired
+ * @mutex - struct_mutex used to guard retirements
+ *
+ * i915_active_request_set_retire_fn() updates the function pointer that
+ * is called when the final request associated with the @active tracker
+ * is retired.
+ */
+static inline void
+i915_active_request_set_retire_fn(struct i915_active_request *active,
+				  i915_active_retire_fn fn,
+				  struct mutex *mutex)
+{
+	lockdep_assert_held(mutex);
+	active->retire = fn ?: i915_active_retire_noop;
+}
+
+static inline struct i915_request *
+__i915_active_request_peek(const struct i915_active_request *active)
+{
+	/*
+	 * Inside the error capture (running with the driver in an unknown
+	 * state), we want to bend the rules slightly (a lot).
+	 *
+	 * Work is in progress to make it safer, in the meantime this keeps
+	 * the known issue from spamming the logs.
+	 */
+	return rcu_dereference_protected(active->request, 1);
+}
+
+/**
+ * i915_active_request_raw - return the active request
+ * @active - the active tracker
+ *
+ * i915_active_request_raw() returns the current request being tracked, or NULL.
+ * It does not obtain a reference on the request for the caller, so the caller
+ * must hold struct_mutex.
+ */
+static inline struct i915_request *
+i915_active_request_raw(const struct i915_active_request *active,
+			struct mutex *mutex)
+{
+	return rcu_dereference_protected(active->request,
+					 lockdep_is_held(mutex));
+}
+
+/**
+ * i915_active_request_peek - report the active request being monitored
+ * @active - the active tracker
+ *
+ * i915_active_request_peek() returns the current request being tracked if
+ * still active, or NULL. It does not obtain a reference on the request
+ * for the caller, so the caller must hold struct_mutex.
+ */
+static inline struct i915_request *
+i915_active_request_peek(const struct i915_active_request *active,
+			 struct mutex *mutex)
+{
+	struct i915_request *request;
+
+	request = i915_active_request_raw(active, mutex);
+	if (!request || i915_request_completed(request))
+		return NULL;
+
+	return request;
+}
+
+/**
+ * i915_active_request_get - return a reference to the active request
+ * @active - the active tracker
+ *
+ * i915_active_request_get() returns a reference to the active request, or NULL
+ * if the active tracker is idle. The caller must hold struct_mutex.
+ */
+static inline struct i915_request *
+i915_active_request_get(const struct i915_active_request *active,
+			struct mutex *mutex)
+{
+	return i915_request_get(i915_active_request_peek(active, mutex));
+}
+
+/**
+ * __i915_active_request_get_rcu - return a reference to the active request
+ * @active - the active tracker
+ *
+ * __i915_active_request_get() returns a reference to the active request,
+ * or NULL if the active tracker is idle. The caller must hold the RCU read
+ * lock, but the returned pointer is safe to use outside of RCU.
+ */
+static inline struct i915_request *
+__i915_active_request_get_rcu(const struct i915_active_request *active)
+{
+	/*
+	 * Performing a lockless retrieval of the active request is super
+	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
+	 * slab of request objects will not be freed whilst we hold the
+	 * RCU read lock. It does not guarantee that the request itself
+	 * will not be freed and then *reused*. Viz,
+	 *
+	 * Thread A			Thread B
+	 *
+	 * rq = active.request
+	 *				retire(rq) -> free(rq);
+	 *				(rq is now first on the slab freelist)
+	 *				active.request = NULL
+	 *
+	 *				rq = new submission on a new object
+	 * ref(rq)
+	 *
+	 * To prevent the request from being reused whilst the caller
+	 * uses it, we take a reference like normal. Whilst acquiring
+	 * the reference we check that it is not in a destroyed state
+	 * (refcnt == 0). That prevents the request being reallocated
+	 * whilst the caller holds on to it. To check that the request
+	 * was not reallocated as we acquired the reference we have to
+	 * check that our request remains the active request across
+	 * the lookup, in the same manner as a seqlock. The visibility
+	 * of the pointer versus the reference counting is controlled
+	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
+	 *
+	 * In the middle of all that, we inspect whether the request is
+	 * complete. Retiring is lazy so the request may be completed long
+	 * before the active tracker is updated. Querying whether the
+	 * request is complete is far cheaper (as it involves no locked
+	 * instructions setting cachelines to exclusive) than acquiring
+	 * the reference, so we do it first. The RCU read lock ensures the
+	 * pointer dereference is valid, but does not ensure that the
+	 * seqno nor HWS is the right one! However, if the request was
+	 * reallocated, that means the active tracker's request was complete.
+	 * If the new request is also complete, then both are and we can
+	 * just report the active tracker is idle. If the new request is
+	 * incomplete, then we acquire a reference on it and check that
+	 * it remained the active request.
+	 *
+	 * It is then imperative that we do not zero the request on
+	 * reallocation, so that we can chase the dangling pointers!
+	 * See i915_request_alloc().
+	 */
+	do {
+		struct i915_request *request;
+
+		request = rcu_dereference(active->request);
+		if (!request || i915_request_completed(request))
+			return NULL;
+
+		/*
+		 * An especially silly compiler could decide to recompute the
+		 * result of i915_request_completed, more specifically
+		 * re-emit the load for request->fence.seqno. A race would catch
+		 * a later seqno value, which could flip the result from true to
+		 * false. Which means part of the instructions below might not
+		 * be executed, while later on instructions are executed. Due to
+		 * barriers within the refcounting the inconsistency can't reach
+		 * past the call to i915_request_get_rcu, but not executing
+		 * that while still executing i915_request_put() creates
+		 * havoc enough.  Prevent this with a compiler barrier.
+		 */
+		barrier();
+
+		request = i915_request_get_rcu(request);
+
+		/*
+		 * What stops the following rcu_access_pointer() from occurring
+		 * before the above i915_request_get_rcu()? If we were
+		 * to read the value before pausing to get the reference to
+		 * the request, we may not notice a change in the active
+		 * tracker.
+		 *
+		 * The rcu_access_pointer() is a mere compiler barrier, which
+		 * means both the CPU and compiler are free to perform the
+		 * memory read without constraint. The compiler only has to
+		 * ensure that any operations after the rcu_access_pointer()
+		 * occur afterwards in program order. This means the read may
+		 * be performed earlier by an out-of-order CPU, or adventurous
+		 * compiler.
+		 *
+		 * The atomic operation at the heart of
+		 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
+		 * atomic_inc_not_zero() which is only a full memory barrier
+		 * when successful. That is, if i915_request_get_rcu()
+		 * returns the request (and so with the reference counted
+		 * incremented) then the following read for rcu_access_pointer()
+		 * must occur after the atomic operation and so confirm
+		 * that this request is the one currently being tracked.
+		 *
+		 * The corresponding write barrier is part of
+		 * rcu_assign_pointer().
+		 */
+		if (!request || request == rcu_access_pointer(active->request))
+			return rcu_pointer_handoff(request);
+
+		i915_request_put(request);
+	} while (1);
+}
+
+/**
+ * i915_active_request_get_unlocked - return a reference to the active request
+ * @active - the active tracker
+ *
+ * i915_active_request_get_unlocked() returns a reference to the active request,
+ * or NULL if the active tracker is idle. The reference is obtained under RCU,
+ * so no locking is required by the caller.
+ *
+ * The reference should be freed with i915_request_put().
+ */
+static inline struct i915_request *
+i915_active_request_get_unlocked(const struct i915_active_request *active)
+{
+	struct i915_request *request;
+
+	rcu_read_lock();
+	request = __i915_active_request_get_rcu(active);
+	rcu_read_unlock();
+
+	return request;
+}
+
+/**
+ * i915_active_request_isset - report whether the active tracker is assigned
+ * @active - the active tracker
+ *
+ * i915_active_request_isset() returns true if the active tracker is currently
+ * assigned to a request. Due to the lazy retiring, that request may be idle
+ * and this may report stale information.
+ */
+static inline bool
+i915_active_request_isset(const struct i915_active_request *active)
+{
+	return rcu_access_pointer(active->request);
+}
+
+/**
+ * i915_active_request_retire - waits until the request is retired
+ * @active - the active request on which to wait
+ *
+ * i915_active_request_retire() waits until the re
author	Dave Airlie <airlied@redhat.com>	2019-02-11 13:41:53 +1000
committer	Dave Airlie <airlied@redhat.com>	2019-02-11 13:41:59 +1000
commit	5ea3998d56346975c2701df18fb5b6e3ab5c8d9e (patch)
tree	07a9f5d74cc761cc73e66cefd0b7498341a8329f
parent	38f070eb125a9253ec1acd641e7159555f57132a (diff)
parent	c09d39166d8a3f3788680b32dbb0a40a70de32e2 (diff)