summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c314
1 files changed, 217 insertions, 97 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 1785fdad6ecb..d78059fd2c72 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -35,6 +35,8 @@
#include "gc/gc_10_1_0_offset.h"
#include "gc/gc_10_1_0_sh_mask.h"
+#include "smuio/smuio_11_0_0_offset.h"
+#include "smuio/smuio_11_0_0_sh_mask.h"
#include "navi10_enum.h"
#include "hdp/hdp_5_0_0_offset.h"
#include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
@@ -52,7 +54,7 @@
* 1. Primary ring
* 2. Async ring
*/
-#define GFX10_NUM_GFX_RINGS 2
+#define GFX10_NUM_GFX_RINGS_NV1X 1
#define GFX10_MEC_HPD_SIZE 2048
#define F32_CE_PROGRAM_RAM_SIZE 65536
@@ -222,6 +224,49 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
};
+static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
+{
+ static void *scratch_reg0;
+ static void *scratch_reg1;
+ static void *scratch_reg2;
+ static void *scratch_reg3;
+ static void *spare_int;
+ static uint32_t grbm_cntl;
+ static uint32_t grbm_idx;
+ uint32_t i = 0;
+ uint32_t retries = 50000;
+
+ scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
+ scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
+ scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
+ scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
+ spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;
+
+ grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
+ grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
+
+ if (amdgpu_sriov_runtime(adev)) {
+ pr_err("shouldn't call rlcg write register during runtime\n");
+ return;
+ }
+
+ writel(v, scratch_reg0);
+ writel(offset | 0x80000000, scratch_reg1);
+ writel(1, spare_int);
+ for (i = 0; i < retries; i++) {
+ u32 tmp;
+
+ tmp = readl(scratch_reg1);
+ if (!(tmp & 0x80000000))
+ break;
+
+ udelay(10);
+ }
+
+ if (i >= retries)
+ pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
+}
+
static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
{
/* Pending on emulation bring up */
@@ -234,7 +279,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] =
#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
- (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
+ (SH_MEM_ALIGNMENT_MODE_DWORD << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
(SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \
(3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
@@ -500,29 +545,28 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- uint32_t scratch;
- uint32_t tmp = 0;
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
long r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r)
return r;
- }
-
- WREG32(scratch, 0xCAFEDEAD);
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ r = amdgpu_ib_get(adev, NULL, 16, &ib);
+ if (r)
goto err1;
- }
- ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
- ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
- ib.ptr[2] = 0xDEADBEEF;
- ib.length_dw = 3;
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
@@ -530,15 +574,13 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out.\n");
r = -ETIMEDOUT;
goto err2;
} else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
}
- tmp = RREG32(scratch);
+ tmp = adev->wb.wb[index];
if (tmp == 0xDEADBEEF)
r = 0;
else
@@ -547,8 +589,7 @@ err2:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err1:
- amdgpu_gfx_scratch_free(adev, scratch);
-
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1016,6 +1057,10 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev)
return r;
}
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+
return 0;
}
@@ -1068,7 +1113,7 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev)
return r;
}
- memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
+ memset(hpd, 0, mec_hpd_size);
amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -1304,7 +1349,7 @@ static int gfx_v10_0_sw_init(void *handle)
case CHIP_NAVI14:
case CHIP_NAVI12:
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 2;
+ adev->gfx.me.num_pipe_per_me = 1;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@@ -1783,11 +1828,11 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev)
adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* csib */
- WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI,
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI,
adev->gfx.rlc.clear_state_gpu_addr >> 32);
- WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO,
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO,
adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
- WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
+ WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
return 0;
}
@@ -1895,6 +1940,11 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
gfx_v10_0_rlc_enable_srm(adev);
} else {
+ if (amdgpu_sriov_vf(adev)) {
+ gfx_v10_0_init_csb(adev);
+ return 0;
+ }
+
adev->gfx.rlc.funcs->stop(adev);
/* disable CG */
@@ -2395,7 +2445,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
adev->gfx.gfx_ring[i].sched.ready = false;
}
- WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
+ WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
for (i = 0; i < adev->usec_timeout; i++) {
if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
@@ -2710,18 +2760,20 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_commit(ring);
/* submit cs packet to copy state 0 to next available state */
- ring = &adev->gfx.gfx_ring[1];
- r = amdgpu_ring_alloc(ring, 2);
- if (r) {
- DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
- return r;
- }
-
- amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
- amdgpu_ring_write(ring, 0);
+ if (adev->gfx.num_gfx_rings > 1) {
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ r = amdgpu_ring_alloc(ring, 2);
+ if (r) {
+ DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
+ return r;
+ }
- amdgpu_ring_commit(ring);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_commit(ring);
+ }
return 0;
}
@@ -2818,39 +2870,41 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
/* Init gfx ring 1 for pipe 1 */
- mutex_lock(&adev->srbm_mutex);
- gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
- ring = &adev->gfx.gfx_ring[1];
- rb_bufsz = order_base_2(ring->ring_size / 8);
- tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
- tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
- WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
- /* Initialize the ring buffer's write pointers */
- ring->wptr = 0;
- WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
- WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
- /* Set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
- WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
- WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
- CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
- wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
- lower_32_bits(wptr_gpu_addr));
- WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
- upper_32_bits(wptr_gpu_addr));
-
- mdelay(1);
- WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
-
- rb_addr = ring->gpu_addr >> 8;
- WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
- WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
- WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
-
- gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
- mutex_unlock(&adev->srbm_mutex);
-
+ if (adev->gfx.num_gfx_rings > 1) {
+ mutex_lock(&adev->srbm_mutex);
+ gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1);
+ /* maximum supported gfx ring is 2 */
+ ring = &adev->gfx.gfx_ring[1];
+ rb_bufsz = order_base_2(ring->ring_size / 8);
+ tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+ /* Initialize the ring buffer's write pointers */
+ ring->wptr = 0;
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr));
+ /* Set the wb address wether it's enabled or not */
+ rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) &
+ CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
+ wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO,
+ lower_32_bits(wptr_gpu_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI,
+ upper_32_bits(wptr_gpu_addr));
+
+ mdelay(1);
+ WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp);
+
+ rb_addr = ring->gpu_addr >> 8;
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr);
+ WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr));
+ WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1);
+
+ gfx_v10_0_cp_gfx_set_doorbell(adev, ring);
+ mutex_unlock(&adev->srbm_mutex);
+ }
/* Switch to pipe 0 */
mutex_lock(&adev->srbm_mutex);
gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0);
@@ -3164,12 +3218,7 @@ static int gfx_v10_0_kiq_enable_kgq(struct amdgpu_device *adev)
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.gfx_ring[i]);
- r = amdgpu_ring_test_ring(kiq_ring);
- if (r) {
- DRM_ERROR("kfq enable failed\n");
- kiq_ring->sched.ready = false;
- }
- return r;
+ return amdgpu_ring_test_helper(kiq_ring);
}
#endif
@@ -3212,6 +3261,22 @@ done:
return r;
}
+static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct v10_compute_mqd *mqd)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+ if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
+ mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+ ring->has_high_prio = true;
+ mqd->cp_hqd_queue_priority =
+ AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+ } else {
+ ring->has_high_prio = false;
+ }
+ }
+}
+
static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -3337,6 +3402,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
mqd->cp_hqd_ib_control = tmp;
+ /* set static priority for a compute queue/ring */
+ gfx_v10_0_compute_mqd_set_priority(ring, mqd);
+
/* map_queues packet doesn't need activate the queue,
* so only kiq need set this field.
*/
@@ -3513,6 +3581,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring)
/* reset ring buffer */
ring->wptr = 0;
+ atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
amdgpu_ring_clear_ring(ring);
} else {
amdgpu_ring_clear_ring(ring);
@@ -3785,7 +3854,7 @@ static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
PREEMPT_QUEUES, 0, 0);
- return amdgpu_ring_test_ring(kiq_ring);
+ return amdgpu_ring_test_helper(kiq_ring);
}
#endif
@@ -3923,11 +3992,12 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock;
+ amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->gfx.gpu_clock_mutex);
- WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
- clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
- ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+ clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) |
+ ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL);
mutex_unlock(&adev->gfx.gpu_clock_mutex);
+ amdgpu_gfx_off_ctrl(adev, true);
return clock;
}
@@ -3964,7 +4034,8 @@ static int gfx_v10_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS;
+ adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X;
+
adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
gfx_v10_0_set_kiq_pm4_funcs(adev);
@@ -4033,6 +4104,12 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
/* It is disabled by HW by default */
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ /* 0 - Disable some blocks' MGCG */
+ WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+ WREG32_SOC15(GC, 0, mmCGTT_WD_CLK_CTRL, 0xff000000);
+ WREG32_SOC15(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xff000000);
+ WREG32_SOC15(GC, 0, mmCGTT_IA_CLK_CTRL, 0xff000000);
+
/* 1 - RLC_CGTT_MGCG_OVERRIDE */
def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
@@ -4072,19 +4149,20 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
if (def != data)
WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
- /* 2 - disable MGLS in RLC */
+ /* 2 - disable MGLS in CP */
+ data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
+ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
+ data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
+ WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
+ }
+
+ /* 3 - disable MGLS in RLC */
data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
}
- /* 3 - disable MGLS in CP */
- data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
- if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
- data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
- WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
- }
}
}
@@ -4195,7 +4273,7 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
/* === CGCG /CGLS for GFX 3D Only === */
gfx_v10_0_update_3d_clock_gating(adev, enable);
/* === MGCG + MGLS === */
- gfx_v10_0_update_medium_grain_clock_gating(adev, enable);
+ /* gfx_v10_0_update_medium_grain_clock_gating(adev, enable); */
}
if (adev->cg_flags &
@@ -4212,6 +4290,45 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
return 0;
}
+static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+{
+ u32 data;
+
+ data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
+
+ data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
+
+ WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
+}
+
+static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
+ uint32_t offset,
+ struct soc15_reg_rlcg *entries, int arr_size)
+{
+ int i;
+ uint32_t reg;
+
+ if (!entries)
+ return false;
+
+ for (i = 0; i < arr_size; i++) {
+ const struct soc15_reg_rlcg *entry;
+
+ entry = &entries[i];
+ reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
+ if (offset == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
+{
+ return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0);
+}
+
static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
.is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
.set_safe_mode = gfx_v10_0_set_safe_mode,
@@ -4222,7 +4339,10 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
.resume = gfx_v10_0_rlc_resume,
.stop = gfx_v10_0_rlc_stop,
.reset = gfx_v10_0_rlc_reset,
- .start = gfx_v10_0_rlc_start
+ .start = gfx_v10_0_rlc_start,
+ .update_spm_vmid = gfx_v10_0_update_spm_vmid,
+ .rlcg_wreg = gfx_v10_rlcg_wreg,
+ .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};
static int gfx_v10_0_set_powergating_state(void *handle,
@@ -4411,15 +4531,15 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
control |= ib->length_dw | (vmid << 24);
- if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+ if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
control |= INDIRECT_BUFFER_PRE_ENB(1);
if (flags & AMDGPU_IB_PREEMPTED)
control |= INDIRECT_BUFFER_PRE_RESUME(1);
- if (!(ib->flags & AMDGPU_IB_FLAG_CE))
+ if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
gfx_v10_0_ring_emit_de_meta(ring,
- flags & AMDGPU_IB_PREEMPTED ? true : false);
+ (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
}
amdgpu_ring_write(ring, header);
@@ -4566,9 +4686,9 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flag
{
uint32_t dw2 = 0;
- if (amdgpu_mcbp)
+ if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev))
gfx_v10_0_ring_emit_ce_meta(ring,
- flags & AMDGPU_IB_PREEMPTED ? true : false);
+ (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
gfx_v10_0_ring_emit_tmz(ring, true);