summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2016-03-18 07:16:25 +1000
committerDave Airlie <airlied@redhat.com>2016-03-18 07:16:25 +1000
commit902d02db1ff1b0d0075276917a36ba70847798a7 (patch)
tree2dbd296578edf17b7abd5d2ecde1b1f2f1eb5e10 /drivers/gpu/drm
parente6087877794520748f7295212a4c6bdb870122f2 (diff)
parentb9c743b85dc378510ef0e5ebe3c2e4ac1495c410 (diff)
Merge branch 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux into drm-next
A few other misc cleanups and bug fixes for 4.6. Highlights: - unify endian handling in powerplay - powerplay fixes - fix a regression in 4.5 on boards with no display connectors - fence cleanups and locking fixes - whitespace cleanups and code refactoring in radeon * 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux: (35 commits) drm/amdgpu/gfx7: add MTYPE definition drm/amdgpu: removing BO_VAs shouldn't be interruptible drm/amd/powerplay: show uvd/vce power gate enablement for tonga. drm/amd/powerplay: show uvd/vce power gate info for fiji drm/amdgpu: use sched fence if possible drm/amdgpu: move ib.fence to job.fence drm/amdgpu: give a fence param to ib_free drm/amdgpu: include the right version of gmc header files for iceland drm/radeon: fix indentation. drm/amd/powerplay: add uvd/vce dpm enabling flag to fix the performance issue for CZ drm/amdgpu: switch back to 32bit hw fences v2 drm/amdgpu: remove amdgpu_fence_is_signaled drm/amdgpu: drop the extra fence range check v2 drm/amdgpu: signal fences directly in amdgpu_fence_process drm/amdgpu: cleanup amdgpu_fence_wait_empty v2 drm/amdgpu: keep all fences in an RCU protected array v2 drm/amdgpu: add number of hardware submissions to amdgpu_fence_driver_init_ring drm/amdgpu: RCU protected amd_sched_fence_release drm/amdgpu: RCU protected amdgpu_fence_release drm/amdgpu: merge amdgpu_fence_process and _activity ...
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c375
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c3
-rw-r--r--drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h6
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c5
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c5
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h12
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h12
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/pp_endian.h38
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/smumgr.h21
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h18
-rw-r--r--drivers/gpu/drm/amd/scheduler/sched_fence.c23
-rw-r--r--drivers/gpu/drm/radeon/atom.c7
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c6
-rw-r--r--drivers/gpu/drm/radeon/atombios_dp.c4
-rw-r--r--drivers/gpu/drm/radeon/btc_dpm.c41
-rw-r--r--drivers/gpu/drm/radeon/ci_dpm.c42
-rw-r--r--drivers/gpu/drm/radeon/ci_smc.c8
-rw-r--r--drivers/gpu/drm/radeon/cik.c1697
-rw-r--r--drivers/gpu/drm/radeon/cypress_dpm.c8
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c2
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c32
-rw-r--r--drivers/gpu/drm/radeon/evergreen_hdmi.c2
-rw-r--r--drivers/gpu/drm/radeon/kv_dpm.c4
-rw-r--r--drivers/gpu/drm/radeon/ni.c4
-rw-r--r--drivers/gpu/drm/radeon/ni_dpm.c170
-rw-r--r--drivers/gpu/drm/radeon/r600.c8
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c20
-rw-r--r--drivers/gpu/drm/radeon/r600_dpm.c6
-rw-r--r--drivers/gpu/drm/radeon/r600_hdmi.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c8
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c12
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c25
-rw-r--r--drivers/gpu/drm/radeon/radeon_ib.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_encoders.c92
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_pm.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_semaphore.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_uvd.c8
-rw-r--r--drivers/gpu/drm/radeon/radeon_vce.c22
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c19
-rw-r--r--drivers/gpu/drm/radeon/rs780_dpm.c2
-rw-r--r--drivers/gpu/drm/radeon/rv6xx_dpm.c18
-rw-r--r--drivers/gpu/drm/radeon/rv740_dpm.c16
-rw-r--r--drivers/gpu/drm/radeon/rv770_dpm.c46
-rw-r--r--drivers/gpu/drm/radeon/si.c969
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c98
-rw-r--r--drivers/gpu/drm/radeon/sumo_dpm.c6
-rw-r--r--drivers/gpu/drm/radeon/trinity_dpm.c24
-rw-r--r--drivers/gpu/drm/radeon/vce_v2_0.c2
64 files changed, 1844 insertions, 2332 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a80c8cea7609..c4a21c6428f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -141,7 +141,6 @@ extern unsigned amdgpu_pcie_lane_cap;
#define CIK_CURSOR_HEIGHT 128
struct amdgpu_device;
-struct amdgpu_fence;
struct amdgpu_ib;
struct amdgpu_vm;
struct amdgpu_ring;
@@ -348,13 +347,15 @@ struct amdgpu_fence_driver {
uint64_t gpu_addr;
volatile uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
- uint64_t sync_seq;
- atomic64_t last_seq;
+ uint32_t sync_seq;
+ atomic_t last_seq;
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
struct timer_list fallback_timer;
- wait_queue_head_t fence_queue;
+ unsigned num_fences_mask;
+ spinlock_t lock;
+ struct fence **fences;
};
/* some special values for the owner field */
@@ -364,16 +365,6 @@ struct amdgpu_fence_driver {
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
-struct amdgpu_fence {
- struct fence base;
-
- /* RB, DMA, etc. */
- struct amdgpu_ring *ring;
- uint64_t seq;
-
- wait_queue_t fence_wake;
-};
-
struct amdgpu_user_fence {
/* write-back bo */
struct amdgpu_bo *bo;
@@ -385,7 +376,8 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ unsigned num_hw_submission);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
unsigned irq_type);
@@ -393,7 +385,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence);
void amdgpu_fence_process(struct amdgpu_ring *ring);
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
@@ -539,11 +530,14 @@ int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
* Assumption is that there won't be hole (all object on same
* alignment).
*/
+
+#define AMDGPU_SA_NUM_FENCE_LISTS 32
+
struct amdgpu_sa_manager {
wait_queue_head_t wq;
struct amdgpu_bo *bo;
struct list_head *hole;
- struct list_head flist[AMDGPU_MAX_RINGS];
+ struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
struct list_head olist;
unsigned size;
uint64_t gpu_addr;
@@ -727,7 +721,6 @@ struct amdgpu_ib {
uint32_t length_dw;
uint64_t gpu_addr;
uint32_t *ptr;
- struct fence *fence;
struct amdgpu_user_fence *user;
struct amdgpu_vm *vm;
unsigned vm_id;
@@ -1143,7 +1136,7 @@ struct amdgpu_gfx {
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib);
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_ib *ib, struct fence *last_vm_update,
struct fence **f);
@@ -1164,7 +1157,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src, unsigned irq_type,
enum amdgpu_ring_type ring_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
-struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);
/*
* CS.
@@ -1206,6 +1198,7 @@ struct amdgpu_job {
struct amdgpu_ring *ring;
struct amdgpu_sync sync;
struct amdgpu_ib *ibs;
+ struct fence *fence; /* the hw fence */
uint32_t num_ibs;
void *owner;
struct amdgpu_user_fence uf;
@@ -2067,20 +2060,6 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
/*
- * Cast helper
- */
-extern const struct fence_ops amdgpu_fence_ops;
-static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
-{
- struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
-
- if (__f->base.ops == &amdgpu_fence_ops)
- return __f;
-
- return NULL;
-}
-
-/*
* Registers read & write functions.
*/
#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 83599f2a0387..4303b447efe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,9 +47,30 @@
* that the the relevant GPU caches have been flushed.
*/
+struct amdgpu_fence {
+ struct fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+};
+
static struct kmem_cache *amdgpu_fence_slab;
static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
+/*
+ * Cast helper
+ */
+static const struct fence_ops amdgpu_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
+{
+ struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
+
+ if (__f->base.ops == &amdgpu_fence_ops)
+ return __f;
+
+ return NULL;
+}
+
/**
* amdgpu_fence_write - write a fence value
*
@@ -82,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
if (drv->cpu_addr)
seq = le32_to_cpu(*drv->cpu_addr);
else
- seq = lower_32_bits(atomic64_read(&drv->last_seq));
+ seq = atomic_read(&drv->last_seq);
return seq;
}
@@ -100,20 +121,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_fence *fence;
+ struct fence **ptr;
+ uint32_t seq;
fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
if (fence == NULL)
return -ENOMEM;
- fence->seq = ++ring->fence_drv.sync_seq;
+ seq = ++ring->fence_drv.sync_seq;
fence->ring = ring;
fence_init(&fence->base, &amdgpu_fence_ops,
- &ring->fence_drv.fence_queue.lock,
+ &ring->fence_drv.lock,
adev->fence_context + ring->idx,
- fence->seq);
+ seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
- fence->seq, AMDGPU_FENCE_FLAG_INT);
+ seq, AMDGPU_FENCE_FLAG_INT);
+
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ /* This function can't be called concurrently anyway, otherwise
+ * emitting the fence would mess up the hardware ring buffer.
+ */
+ BUG_ON(rcu_dereference_protected(*ptr, 1));
+
+ rcu_assign_pointer(*ptr, fence_get(&fence->base));
+
*f = &fence->base;
+
return 0;
}
@@ -131,89 +164,48 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
}
/**
- * amdgpu_fence_activity - check for fence activity
+ * amdgpu_fence_process - check for fence activity
*
* @ring: pointer to struct amdgpu_ring
*
* Checks the current fence value and calculates the last
- * signalled fence value. Returns true if activity occured
- * on the ring, and the fence_queue should be waken up.
+ * signalled fence value. Wakes the fence queue if the
+ * sequence number has increased.
*/
-static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
+void amdgpu_fence_process(struct amdgpu_ring *ring)
{
- uint64_t seq, last_seq, last_emitted;
- unsigned count_loop = 0;
- bool wake = false;
-
- /* Note there is a scenario here for an infinite loop but it's
- * very unlikely to happen. For it to happen, the current polling
- * process need to be interrupted by another process and another
- * process needs to update the last_seq btw the atomic read and
- * xchg of the current process.
- *
- * More over for this to go in infinite loop there need to be
- * continuously new fence signaled ie amdgpu_fence_read needs
- * to return a different value each time for both the currently
- * polling process and the other process that xchg the last_seq
- * btw atomic read and xchg of the current process. And the
- * value the other process set as last seq must be higher than
- * the seq value we just read. Which means that current process
- * need to be interrupted after amdgpu_fence_read and before
- * atomic xchg.
- *
- * To be even more safe we count the number of time we loop and
- * we bail after 10 loop just accepting the fact that we might
- * have temporarly set the last_seq not to the true real last
- * seq but to an older one.
- */
- last_seq = atomic64_read(&ring->fence_drv.last_seq);
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ uint32_t seq, last_seq;
+ int r;
+
do {
- last_emitted = ring->fence_drv.sync_seq;
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
seq = amdgpu_fence_read(ring);
- seq |= last_seq & 0xffffffff00000000LL;
- if (seq < last_seq) {
- seq &= 0xffffffff;
- seq |= last_emitted & 0xffffffff00000000LL;
- }
- if (seq <= last_seq || seq > last_emitted) {
- break;
- }
- /* If we loop over we don't want to return without
- * checking if a fence is signaled as it means that the
- * seq we just read is different from the previous on.
- */
- wake = true;
- last_seq = seq;
- if ((count_loop++) > 10) {
- /* We looped over too many time leave with the
- * fact that we might have set an older fence
- * seq then the current real last seq as signaled
- * by the hw.
- */
- break;
- }
- } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
+ } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (seq < last_emitted)
+ if (seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
- return wake;
-}
+ while (last_seq != seq) {
+ struct fence *fence, **ptr;
-/**
- * amdgpu_fence_process - process a fence
- *
- * @adev: amdgpu_device pointer
- * @ring: ring index the fence is associated with
- *
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
- */
-void amdgpu_fence_process(struct amdgpu_ring *ring)
-{
- if (amdgpu_fence_activity(ring))
- wake_up_all(&ring->fence_drv.fence_queue);
+ ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+
+ /* There is always exactly one thread signaling this fence slot */
+ fence = rcu_dereference_protected(*ptr, 1);
+ rcu_assign_pointer(*ptr, NULL);
+
+ BUG_ON(!fence);
+
+ r = fence_signal(fence);
+ if (!r)
+ FENCE_TRACE(fence, "signaled from irq context\n");
+ else
+ BUG();
+
+ fence_put(fence);
+ }
}
/**
@@ -231,77 +223,6 @@ static void amdgpu_fence_fallback(unsigned long arg)
}
/**
- * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
- *
- * @ring: ring the fence is associated with
- * @seq: sequence number
- *
- * Check if the last signaled fence sequnce number is >= the requested
- * sequence number (all asics).
- * Returns true if the fence has signaled (current fence value
- * is >= requested value) or false if it has not (current fence
- * value is < the requested value. Helper function for
- * amdgpu_fence_signaled().
- */
-static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
-{
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return true;
-
- /* poll new last sequence at least once */
- amdgpu_fence_process(ring);
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return true;
-
- return false;
-}
-
-/*
- * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal
- * @ring: ring to wait on for the seq number
- * @seq: seq number wait for
- *
- * return value:
- * 0: seq signaled, and gpu not hang
- * -EINVAL: some paramter is not valid
- */
-static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
-{
- BUG_ON(!ring);
- if (seq > ring->fence_drv.sync_seq)
- return -EINVAL;
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return 0;
-
- amdgpu_fence_schedule_fallback(ring);
- wait_event(ring->fence_drv.fence_queue,
- amdgpu_fence_seq_signaled(ring, seq));
-
- return 0;
-}
-
-/**
- * amdgpu_fence_wait_next - wait for the next fence to signal
- *
- * @adev: amdgpu device pointer
- * @ring: ring index the fence is associated with
- *
- * Wait for the next fence on the requested ring to signal (all asics).
- * Returns 0 if the next fence has passed, error for all other cases.
- * Caller must hold ring lock.
- */
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
-{
- uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
-
- if (seq >= ring->fence_drv.sync_seq)
- return -ENOENT;
-
- return amdgpu_fence_ring_wait_seq(ring, seq);
-}
-
-/**
* amdgpu_fence_wait_empty - wait for all fences to signal
*
* @adev: amdgpu device pointer
@@ -309,16 +230,28 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
*
* Wait for all fences on the requested ring to signal (all asics).
* Returns 0 if the fences have passed, error for all other cases.
- * Caller must hold ring lock.
*/
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
{
- uint64_t seq = ring->fence_drv.sync_seq;
+ uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq);
+ struct fence *fence, **ptr;
+ int r;
if (!seq)
return 0;
- return amdgpu_fence_ring_wait_seq(ring, seq);
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ rcu_read_lock();
+ fence = rcu_dereference(*ptr);
+ if (!fence || !fence_get_rcu(fence)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+
+ r = fence_wait(fence, false);
+ fence_put(fence);
+ return r;
}
/**
@@ -338,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
* but it's ok to report slightly wrong fence count here.
*/
amdgpu_fence_process(ring);
- emitted = ring->fence_drv.sync_seq
- - atomic64_read(&ring->fence_drv.last_seq);
- /* to avoid 32bits warp around */
- if (emitted > 0x10000000)
- emitted = 0x10000000;
-
- return (unsigned)emitted;
+ emitted = 0x100000000ull;
+ emitted -= atomic_read(&ring->fence_drv.last_seq);
+ emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
+ return lower_32_bits(emitted);
}
/**
@@ -376,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
}
- amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
+ amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
amdgpu_irq_get(adev, irq_src, irq_type);
ring->fence_drv.irq_src = irq_src;
@@ -394,25 +324,36 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* for the requested ring.
*
* @ring: ring to init the fence driver on
+ * @num_hw_submission: number of entries on the hardware queue
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ unsigned num_hw_submission)
{
long timeout;
int r;
+ /* Check that num_hw_submission is a power of two */
+ if ((num_hw_submission & (num_hw_submission - 1)) != 0)
+ return -EINVAL;
+
ring->fence_drv.cpu_addr = NULL;
ring->fence_drv.gpu_addr = 0;
ring->fence_drv.sync_seq = 0;
- atomic64_set(&ring->fence_drv.last_seq, 0);
+ atomic_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false;
setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
(unsigned long)ring);
- init_waitqueue_head(&ring->fence_drv.fence_queue);
+ ring->fence_drv.num_fences_mask = num_hw_submission - 1;
+ spin_lock_init(&ring->fence_drv.lock);
+ ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
+ GFP_KERNEL);
+ if (!ring->fence_drv.fences)
+ return -ENOMEM;
timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
if (timeout == 0) {
@@ -426,7 +367,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
timeout = MAX_SCHEDULE_TIMEOUT;
}
r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
- amdgpu_sched_hw_submission,
+ num_hw_submission,
timeout, ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
@@ -474,10 +415,9 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
*/
void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
{
- int i, r;
+ unsigned i, j;
+ int r;
- if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
- kmem_cache_destroy(amdgpu_fence_slab);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -488,13 +428,18 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
/* no need to trigger GPU reset as we are unloading */
amdgpu_fence_driver_force_completion(adev);
}
- wake_up_all(&ring->fence_drv.fence_queue);
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
amd_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
+ for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)