diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 337 |
1 files changed, 302 insertions, 35 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 55f4b8c3b933..ba1086784525 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -99,6 +99,23 @@ #define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0 +#define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441 +#define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1 +#define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261 +#define mmVGT_TF_MEMORY_BASE_HI_Vangogh_BASE_IDX 1 +#define mmVGT_HS_OFFCHIP_PARAM_Vangogh 0x224f +#define mmVGT_HS_OFFCHIP_PARAM_Vangogh_BASE_IDX 1 +#define mmVGT_TF_RING_SIZE_Vangogh 0x224e +#define mmVGT_TF_RING_SIZE_Vangogh_BASE_IDX 1 +#define mmVGT_GSVS_RING_SIZE_Vangogh 0x2241 +#define mmVGT_GSVS_RING_SIZE_Vangogh_BASE_IDX 1 +#define mmVGT_TF_MEMORY_BASE_Vangogh 0x2250 +#define mmVGT_TF_MEMORY_BASE_Vangogh_BASE_IDX 1 +#define mmVGT_ESGS_RING_SIZE_Vangogh 0x2240 +#define mmVGT_ESGS_RING_SIZE_Vangogh_BASE_IDX 1 +#define mmSPI_CONFIG_CNTL_Vangogh 0x2440 +#define mmSPI_CONFIG_CNTL_Vangogh_BASE_IDX 1 + #define mmCP_HYP_PFP_UCODE_ADDR 0x5814 #define mmCP_HYP_PFP_UCODE_ADDR_BASE_IDX 1 #define mmCP_HYP_PFP_UCODE_DATA 0x5815 @@ -112,6 +129,13 @@ #define mmCP_HYP_ME_UCODE_DATA 0x5817 #define mmCP_HYP_ME_UCODE_DATA_BASE_IDX 1 +#define mmCPG_PSP_DEBUG 0x5c10 +#define mmCPG_PSP_DEBUG_BASE_IDX 1 +#define mmCPC_PSP_DEBUG 0x5c11 +#define mmCPC_PSP_DEBUG_BASE_IDX 1 +#define CPC_PSP_DEBUG__GPA_OVERRIDE_MASK 0x00000008L +#define CPG_PSP_DEBUG__GPA_OVERRIDE_MASK 0x00000008L + //CC_GC_SA_UNIT_DISABLE #define mmCC_GC_SA_UNIT_DISABLE 0x0fe9 #define mmCC_GC_SA_UNIT_DISABLE_BASE_IDX 0 @@ -131,6 +155,11 @@ #define mmCGTT_SPI_CS_CLK_CTRL 0x507c #define mmCGTT_SPI_CS_CLK_CTRL_BASE_IDX 1 +#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid 0x16f3 +#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0 +#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid 0x15db +#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0 + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -171,6 +200,20 @@ MODULE_FIRMWARE("amdgpu/navy_flounder_mec.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_mec2.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_rlc.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_ce.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_pfp.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_me.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_mec.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_mec2.bin"); +MODULE_FIRMWARE("amdgpu/vangogh_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ce.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_pfp.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_me.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_mec2.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -1366,23 +1409,14 @@ static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v) { static void *scratch_reg0; static void *scratch_reg1; - static void *scratch_reg2; - static void *scratch_reg3; static void *spare_int; - static uint32_t grbm_cntl; - static uint32_t grbm_idx; uint32_t i = 0; uint32_t retries = 50000; scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; - scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; - scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; - grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; - grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; - if (amdgpu_sriov_runtime(adev)) { pr_err("shouldn't call rlcg write register during runtime\n"); return; @@ -3108,6 +3142,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), @@ -3146,6 +3182,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), @@ -3154,6 +3191,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffffffff, 0xff008080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffff8fff, 0xff008080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), @@ -3183,7 +3222,79 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), + + /* This is not in GDB yet. Don't remove it. It fixes a GPU hang on Navy Flounder. */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000142), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), + + /* This is not in GDB yet. Don't remove it. It fixes a GPU hang on VanGogh. */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000280, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0x07800000, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x00001d00, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003c0000, 0x00280400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0x40000000, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0x00040000, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0x01000000, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0x0000001f, 0x00180070), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER0_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER1_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER10_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER11_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER12_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER13_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER14_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER15_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER2_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER3_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER4_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER5_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER6_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020) }; #define DEFAULT_SH_MEM_CONFIG \ @@ -3198,7 +3309,7 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev); static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, - struct amdgpu_cu_info *cu_info); + struct amdgpu_cu_info *cu_info); static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); @@ -3395,7 +3506,16 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_3_2, (const u32)ARRAY_SIZE(golden_settings_gc_10_3_2)); break; - + case CHIP_VANGOGH: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_vangogh, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_vangogh)); + break; + case CHIP_DIMGREY_CAVEFISH: + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_4, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4)); + break; default: break; } @@ -3579,6 +3699,8 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.cp_fw_write_wait = true; break; default: @@ -3646,7 +3768,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) if (!gfx_v10_0_navi10_gfxoff_should_enable(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; - case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; default: @@ -3691,6 +3813,12 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case CHIP_NAVY_FLOUNDER: chip_name = "navy_flounder"; break; + case CHIP_VANGOGH: + chip_name = "vangogh"; + break; + case CHIP_DIMGREY_CAVEFISH: + chip_name = "dimgrey_cavefish"; + break; default: BUG(); } @@ -4206,11 +4334,26 @@ static void gfx_v10_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v10_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) - { - nv_grbm_select(adev, me, pipe, q, vm); - } + u32 me, u32 pipe, u32 q, u32 vm) +{ + nv_grbm_select(adev, me, pipe, q, vm); +} + +static void gfx_v10_0_update_perfmon_mgcg(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def; + + data = def = RREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL); + + if (enable) + data |= RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK; + else + data &= ~RLC_PERFMON_CLK_CNTL__PERFMON_CLOCK_STATE_MASK; + if (data != def) + WREG32_SOC15(GC, 0, mmRLC_PERFMON_CLK_CNTL, data); +} static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = { .get_gpu_clock_counter = &gfx_v10_0_get_gpu_clock_counter, @@ -4220,6 +4363,7 @@ static const struct amdgpu_gfx_funcs gfx_v10_0_gfx_funcs = { .read_wave_vgprs = &gfx_v10_0_read_wave_vgprs, .select_me_pipe_q = &gfx_v10_0_select_me_pipe_q, .init_spm_golden = &gfx_v10_0_init_spm_golden_registers, + .update_perfmon_mgcg = &gfx_v10_0_update_perfmon_mgcg, }; static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) @@ -4241,6 +4385,8 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4334,7 +4480,8 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, @@ -4364,6 +4511,8 @@ static int gfx_v10_0_sw_init(void *handle) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4623,8 +4772,7 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade /* for ASICs that integrates GFX v10.3 * pa_sc_tile_steering_override should be set to 0 */ - if (adev->asic_type == CHIP_SIENNA_CICHLID || - adev->asic_type == CHIP_NAVY_FLOUNDER) + if (adev->asic_type >= CHIP_SIENNA_CICHLID) return 0; /* init num_sc */ @@ -4696,7 +4844,7 @@ static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); @@ -4866,7 +5014,7 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) return 0; } -void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); @@ -5847,20 +5995,24 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, { u32 tmp; - tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); - if (ring->use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 0); + if (!amdgpu_async_gfx_ring) { + tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); } - WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -5994,6 +6146,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); break; default: @@ -6004,6 +6158,8 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -6098,6 +6254,8 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); @@ -6193,6 +6351,11 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring) DOORBELL_EN, 0); mqd->cp_rb_doorbell_control = tmp; + /*if there are 2 gfx rings, set the lower doorbell range of the first ring, + *otherwise the range of the second ring will override the first ring */ + if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1) + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR); @@ -6360,7 +6523,8 @@ static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -6806,6 +6970,7 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, 0); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); @@ -6818,6 +6983,8 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) return false; } break; + case CHIP_VANGOGH: + return true; default: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, 0); @@ -6845,6 +7012,8 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -6960,6 +7129,18 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmGRBM_CAM_DATA, data); } +static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev) +{ + uint32_t data; + data = RREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG); + data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; + WREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG, data); + + data = RREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG); + data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK; + WREG32_SOC15(GC, 0, mmCPG_PSP_DEBUG, data); +} + static int gfx_v10_0_hw_init(void *handle) { int r; @@ -6974,7 +7155,7 @@ static int gfx_v10_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - if (adev->smu.ppt_funcs != NULL) { + if (adev->smu.ppt_funcs != NULL && !(adev->flags & AMD_IS_APU)) { r = smu_load_microcode(&adev->smu); if (r) return r; @@ -6985,6 +7166,7 @@ static int gfx_v10_0_hw_init(void *handle) return r; } } + gfx_v10_0_disable_gpa_mode(adev); } /* if GRBM CAM not remapped, set up the remapping */ @@ -7142,6 +7324,8 @@ static int gfx_v10_0_soft_reset(void *handle) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, @@ -7241,13 +7425,16 @@ static int gfx_v10_0_early_init(void *handle) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; break; default: break; } - adev->gfx.num_compute_rings = amdgpu_num_kcq; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); gfx_v10_0_set_kiq_pm4_funcs(adev); gfx_v10_0_set_ring_funcs(adev); @@ -7294,6 +7481,8 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); /* wait for RLC_SAFE_MODE */ @@ -7326,6 +7515,8 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; default: @@ -7350,6 +7541,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade /* 1 - RLC_CGTT_MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK | RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK); @@ -7486,12 +7678,50 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade } } +static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) { + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* unset FGCG override */ + data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + /* update FGCG override bits */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL); + /* unset RLC SRAM CLK GATER override */ + data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; + /* update RLC SRAM CLK GATER override bits */ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data); + } else { + def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); + /* reset FGCG bits */ + data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK; + /* disable FGCG*/ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); + + def = data = RREG32_SOC15(GC, 0, mmRLC_CLK_CNTL); + /* reset RLC SRAM CLK GATER bits */ + data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; + /* disable RLC SRAM CLK*/ + if (def != data) + WREG32_SOC15(GC, 0, mmRLC_CLK_CNTL, data); + } +} + static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable) { + /* enable FGCG firstly*/ + gfx_v10_0_update_fine_grain_clock_gating(adev, enable); /* CGCG/CGLS should be enabled after MGCG/MGLS * === MGCG + MGLS === */ @@ -7509,6 +7739,8 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, gfx_v10_0_update_3d_clock_gating(adev, enable); /* === MGCG + MGLS === */ gfx_v10_0_update_medium_grain_clock_gating(adev, enable); + /* disable fgcg at last*/ + gfx_v10_0_update_fine_grain_clock_gating(adev, enable); } if (adev->cg_flags & @@ -7570,6 +7802,27 @@ static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offse return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0); } +static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) +{ + u32 data = RREG32_SOC15(GC, 0, mmRLC_PG_CNTL); + + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + + WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, data); +} + +static void gfx_v10_cntl_pg(struct amdgpu_device *adev, bool enable) +{ + amdgpu_gfx_rlc_enter_safe_mode(adev); + + gfx_v10_cntl_power_gating(adev, enable); + + amdgpu_gfx_rlc_exit_safe_mode(adev); +} + static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = { .is_rlc_enabled = gfx_v10_0_is_rlc_enabled, .set_safe_mode = gfx_v10_0_set_safe_mode, @@ -7615,8 +7868,12 @@ static int gfx_v10_0_set_powergating_state(void *handle, case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: amdgpu_gfx_off_ctrl(adev, enable); break; + case CHIP_VANGOGH: + gfx_v10_cntl_pg(adev, enable); + break; default: break; } @@ -7637,6 +7894,8 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -7651,6 +7910,11 @@ static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; + /* AMD_CG_SUPPORT_GFX_FGCG */ + data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_FGCG; + /* AMD_CG_SUPPORT_GFX_MGCG */ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE)); if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) @@ -8400,6 +8664,7 @@ static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; default: break; } @@ -8739,6 +9004,8 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case CHIP_NAVI14: case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_VANGOGH: + case CHIP_DIMGREY_CAVEFISH: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case CHIP_NAVI12: |