diff options
author | Oded Gabbay <oded.gabbay@gmail.com> | 2019-02-16 00:39:17 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2019-02-18 09:46:45 +0100 |
commit | 9494a8dd8d22cbff8ce358aaa223fffe1b070cb0 (patch) | |
tree | 8a7b51e6440aa5248026140fa33d42172dde26de | |
parent | 839c48030d27a690cc85f0762f9f6f07a3349fb3 (diff) |
habanalabs: add h/w queues module
This patch adds the H/W queues module and the code to initialize Goya's
various compute and DMA engines and their queues.
Goya has 5 DMA channels, 8 TPC engines and a single MME engine. For each
channel/engine, there is a H/W queue logic which is used to pass commands
from the user to the H/W. That logic is called QMAN.
There are two types of QMANs: external and internal. The DMA QMANs are
considered external while the TPC and MME QMANs are considered internal.
For each external queue there is a completion queue, which is located on
the Host memory.
The differences between external and internal QMANs are:
1. The location of the queue's memory. External QMANs are located on the
Host memory while internal QMANs are located on the on-chip memory.
2. The external QMAN write an entry to a completion queue and sends an
MSI-X interrupt upon completion of a command buffer that was given to
it. The internal QMAN doesn't do that.
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | drivers/misc/habanalabs/Makefile | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/device.c | 75 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 1311 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goyaP.h | 7 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs.h | 174 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs_drv.c | 5 | ||||
-rw-r--r-- | drivers/misc/habanalabs/hw_queue.c | 400 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/armcp_if.h | 292 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/goya/goya_async_events.h | 186 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/goya/goya_packets.h | 129 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/qman_if.h | 56 | ||||
-rw-r--r-- | drivers/misc/habanalabs/irq.c | 149 | ||||
-rw-r--r-- | include/uapi/misc/habanalabs.h | 29 |
13 files changed, 2807 insertions, 8 deletions
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index 2530c9b78ca4..c07f3ccb57dc 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -5,7 +5,7 @@ obj-m := habanalabs.o habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \ - command_buffer.o + command_buffer.o hw_queue.o irq.o include $(src)/goya/Makefile habanalabs-y += $(HL_GOYA_FILES) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 5eefc2952be5..06e2b7f32499 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -174,13 +174,23 @@ static int device_early_init(struct hl_device *hdev) if (rc) goto early_fini; + hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0); + if (hdev->cq_wq == NULL) { + dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); + rc = -ENOMEM; + goto asid_fini; + } + hl_cb_mgr_init(&hdev->kernel_cb_mgr); mutex_init(&hdev->fd_open_cnt_lock); + mutex_init(&hdev->send_cpu_message_lock); atomic_set(&hdev->fd_open_cnt, 0); return 0; +asid_fini: + hl_asid_fini(hdev); early_fini: if (hdev->asic_funcs->early_fini) hdev->asic_funcs->early_fini(hdev); @@ -196,9 +206,12 @@ early_fini: */ static void device_early_fini(struct hl_device *hdev) { + mutex_destroy(&hdev->send_cpu_message_lock); hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); + destroy_workqueue(hdev->cq_wq); + hl_asid_fini(hdev); if (hdev->asic_funcs->early_fini) @@ -277,7 +290,7 @@ int hl_device_resume(struct hl_device *hdev) */ int hl_device_init(struct hl_device *hdev, struct class *hclass) { - int rc; + int i, rc, cq_ready_cnt; /* Create device */ rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops); @@ -298,11 +311,48 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (rc) goto early_fini; + /* + * Initialize the H/W queues. Must be done before hw_init, because + * there the addresses of the kernel queue are being written to the + * registers of the device + */ + rc = hl_hw_queues_create(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize kernel queues\n"); + goto sw_fini; + } + + /* + * Initialize the completion queues. Must be done before hw_init, + * because there the addresses of the completion queues are being + * passed as arguments to request_irq + */ + hdev->completion_queue = + kcalloc(hdev->asic_prop.completion_queues_count, + sizeof(*hdev->completion_queue), GFP_KERNEL); + + if (!hdev->completion_queue) { + dev_err(hdev->dev, "failed to allocate completion queues\n"); + rc = -ENOMEM; + goto hw_queues_destroy; + } + + for (i = 0, cq_ready_cnt = 0; + i < hdev->asic_prop.completion_queues_count; + i++, cq_ready_cnt++) { + rc = hl_cq_init(hdev, &hdev->completion_queue[i], i); + if (rc) { + dev_err(hdev->dev, + "failed to initialize completion queue\n"); + goto cq_fini; + } + } + /* Allocate the kernel context */ hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); if (!hdev->kernel_ctx) { rc = -ENOMEM; - goto sw_fini; + goto cq_fini; } hdev->user_ctx = NULL; @@ -328,6 +378,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) hdev->disabled = false; + /* Check that the communication with the device is working */ + rc = hdev->asic_funcs->test_queues(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to detect if device is alive\n"); + rc = 0; + goto out_disabled; + } + dev_notice(hdev->dev, "Successfully added device to habanalabs driver\n"); @@ -339,6 +397,12 @@ release_ctx: "kernel ctx is still alive on initialization failure\n"); free_ctx: kfree(hdev->kernel_ctx); +cq_fini: + for (i = 0 ; i < cq_ready_cnt ; i++) + hl_cq_fini(hdev, &hdev->completion_queue[i]); + kfree(hdev->completion_queue); +hw_queues_destroy: + hl_hw_queues_destroy(hdev); sw_fini: hdev->asic_funcs->sw_fini(hdev); early_fini: @@ -368,6 +432,7 @@ out_disabled: */ void hl_device_fini(struct hl_device *hdev) { + int i; dev_info(hdev->dev, "Removing device\n"); /* Mark device as disabled */ @@ -382,6 +447,12 @@ void hl_device_fini(struct hl_device *hdev) /* Reset the H/W. It will be in idle state after this returns */ hdev->asic_funcs->hw_fini(hdev, true); + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + hl_cq_fini(hdev, &hdev->completion_queue[i]); + kfree(hdev->completion_queue); + + hl_hw_queues_destroy(hdev); + /* Call ASIC S/W finalize function */ hdev->asic_funcs->sw_fini(hdev); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 1fa5b91fd703..a45a183d4e5c 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -90,6 +90,26 @@ static void goya_get_fixed_properties(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; + int i; + + for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { + prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; + prop->hw_queues_props[i].kmd_only = 0; + } + + for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) { + prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; + prop->hw_queues_props[i].kmd_only = 1; + } + + for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES + + NUMBER_OF_INT_HW_QUEUES; i++) { + prop->hw_queues_props[i].type = QUEUE_TYPE_INT; + prop->hw_queues_props[i].kmd_only = 0; + } + + for (; i < HL_MAX_QUEUES; i++) + prop->hw_queues_props[i].type = QUEUE_TYPE_NA; prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; @@ -118,6 +138,18 @@ static void goya_get_fixed_properties(struct hl_device *hdev) prop->high_pll = PLL_HIGH_DEFAULT; } +int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode) +{ + struct armcp_packet pkt; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = opcode << ARMCP_PKT_CTL_OPCODE_SHIFT; + + return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, + sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); +} + /* * goya_pci_bars_map - Map PCI BARS of Goya device * @@ -510,6 +542,8 @@ static int goya_sw_init(struct hl_device *hdev) if (!goya) return -ENOMEM; + goya->test_cpu_queue = goya_test_cpu_queue; + /* according to goya_init_iatu */ goya->ddr_bar_cur_addr = DRAM_PHYS_BASE; hdev->asic_specific = goya; @@ -596,6 +630,311 @@ int goya_sw_fini(struct hl_device *hdev) return 0; } +static void goya_init_dma_qman(struct hl_device *hdev, int dma_id, + dma_addr_t bus_address) +{ + struct goya_device *goya = hdev->asic_specific; + u32 mtr_base_lo, mtr_base_hi; + u32 so_base_lo, so_base_hi; + u32 gic_base_lo, gic_base_hi; + u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI); + + mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + + gic_base_lo = + lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + gic_base_hi = + upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + + WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address)); + WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address)); + + WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH)); + WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0); + WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0); + + WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo); + WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi); + WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo); + WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi); + WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo); + WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi); + WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off, + GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id); + + /* PQ has buffer of 2 cache lines, while CQ has 8 lines */ + WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002); + WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008); + + if (dma_id == 0) + WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED); + else + if (goya->hw_cap_initialized & HW_CAP_MMU) + WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, + QMAN_DMA_PARTLY_TRUSTED); + else + WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, + QMAN_DMA_FULLY_TRUSTED); + + WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, QMAN_DMA_ERR_MSG_EN); + WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE); +} + +static void goya_init_dma_ch(struct hl_device *hdev, int dma_id) +{ + u32 gic_base_lo, gic_base_hi; + u64 sob_addr; + u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1); + + gic_base_lo = + lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + gic_base_hi = + upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + + WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo); + WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi); + WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off, + GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id); + + if (dma_id) { + sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 + + (dma_id - 1) * 4; + WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + reg_off, + lower_32_bits(sob_addr)); + WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, + upper_32_bits(sob_addr)); + WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001); + } +} + +/* + * goya_init_dma_qmans - Initialize QMAN DMA registers + * + * @hdev: pointer to hl_device structure + * + * Initialize the H/W registers of the QMAN DMA channels + * + */ +static void goya_init_dma_qmans(struct hl_device *hdev) +{ + struct goya_device *goya = hdev->asic_specific; + struct hl_hw_queue *q; + dma_addr_t bus_address; + int i; + + if (goya->hw_cap_initialized & HW_CAP_DMA) + return; + + q = &hdev->kernel_queues[0]; + + for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) { + bus_address = q->bus_address + + hdev->asic_prop.host_phys_base_address; + + goya_init_dma_qman(hdev, i, bus_address); + goya_init_dma_ch(hdev, i); + } + + goya->hw_cap_initialized |= HW_CAP_DMA; +} + +/* + * goya_disable_external_queues - Disable external queues + * + * @hdev: pointer to hl_device structure + * + */ +static void goya_disable_external_queues(struct hl_device *hdev) +{ + WREG32(mmDMA_QM_0_GLBL_CFG0, 0); + WREG32(mmDMA_QM_1_GLBL_CFG0, 0); + WREG32(mmDMA_QM_2_GLBL_CFG0, 0); + WREG32(mmDMA_QM_3_GLBL_CFG0, 0); + WREG32(mmDMA_QM_4_GLBL_CFG0, 0); +} + +static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg, + u32 cp_sts_reg, u32 glbl_sts0_reg) +{ + int rc; + u32 status; + + /* use the values of TPC0 as they are all the same*/ + + WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); + + status = RREG32(cp_sts_reg); + if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) { + rc = hl_poll_timeout( + hdev, + cp_sts_reg, + status, + !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK), + 1000, + QMAN_FENCE_TIMEOUT_USEC); + + /* if QMAN is stuck in fence no need to check for stop */ + if (rc) + return 0; + } + + rc = hl_poll_timeout( + hdev, + glbl_sts0_reg, + status, + (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK), + 1000, + QMAN_STOP_TIMEOUT_USEC); + + if (rc) { + dev_err(hdev->dev, + "Timeout while waiting for QMAN to stop\n"); + return -EINVAL; + } + + return 0; +} + +/* + * goya_stop_external_queues - Stop external queues + * + * @hdev: pointer to hl_device structure + * + * Returns 0 on success + * + */ +static int goya_stop_external_queues(struct hl_device *hdev) +{ + int rc, retval = 0; + + rc = goya_stop_queue(hdev, + mmDMA_QM_0_GLBL_CFG1, + mmDMA_QM_0_CP_STS, + mmDMA_QM_0_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop DMA QMAN 0\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmDMA_QM_1_GLBL_CFG1, + mmDMA_QM_1_CP_STS, + mmDMA_QM_1_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop DMA QMAN 1\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmDMA_QM_2_GLBL_CFG1, + mmDMA_QM_2_CP_STS, + mmDMA_QM_2_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop DMA QMAN 2\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmDMA_QM_3_GLBL_CFG1, + mmDMA_QM_3_CP_STS, + mmDMA_QM_3_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop DMA QMAN 3\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmDMA_QM_4_GLBL_CFG1, + mmDMA_QM_4_CP_STS, + mmDMA_QM_4_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop DMA QMAN 4\n"); + retval = -EIO; + } + + return retval; +} + +static void goya_resume_external_queues(struct hl_device *hdev) +{ + WREG32(mmDMA_QM_0_GLBL_CFG1, 0); + WREG32(mmDMA_QM_1_GLBL_CFG1, 0); + WREG32(mmDMA_QM_2_GLBL_CFG1, 0); + WREG32(mmDMA_QM_3_GLBL_CFG1, 0); + WREG32(mmDMA_QM_4_GLBL_CFG1, 0); +} + +/* + * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU + * + * @hdev: pointer to hl_device structure + * + * Returns 0 on success + * + */ +int goya_init_cpu_queues(struct hl_device *hdev) +{ + struct goya_device *goya = hdev->asic_specific; + dma_addr_t bus_address; + u32 status; + struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ]; + int err; + + if (!hdev->cpu_queues_enable) + return 0; + + if (goya->hw_cap_initialized & HW_CAP_CPU_Q) + return 0; + + bus_address = cpu_pq->bus_address + + hdev->asic_prop.host_phys_base_address; + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0, lower_32_bits(bus_address)); + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1, upper_32_bits(bus_address)); + + bus_address = hdev->cpu_accessible_dma_address + + hdev->asic_prop.host_phys_base_address; + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8, lower_32_bits(bus_address)); + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9, upper_32_bits(bus_address)); + + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES); + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, CPU_ACCESSIBLE_MEM_SIZE); + + /* Used for EQ CI */ + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0); + + WREG32(mmCPU_IF_PF_PQ_PI, 0); + + WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP); + + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, + GOYA_ASYNC_EVENT_ID_PI_UPDATE); + + err = hl_poll_timeout( + hdev, + mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, + status, + (status == PQ_INIT_STATUS_READY_FOR_HOST), + 1000, + GOYA_CPU_TIMEOUT_USEC); + + if (err) { + dev_err(hdev->dev, + "Failed to communicate with ARM CPU (ArmCP timeout)\n"); + return -EIO; + } + + goya->hw_cap_initialized |= HW_CAP_CPU_Q; + return 0; +} + static void goya_set_pll_refclk(struct hl_device *hdev) { WREG32(mmCPU_PLL_DIV_SEL_0, 0x0); @@ -1023,6 +1362,506 @@ static void goya_init_golden_registers(struct hl_device *hdev) goya->hw_cap_initialized |= HW_CAP_GOLDEN; } +static void goya_init_mme_qman(struct hl_device *hdev) +{ + u32 mtr_base_lo, mtr_base_hi; + u32 so_base_lo, so_base_hi; + u32 gic_base_lo, gic_base_hi; + u64 qman_base_addr; + + mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + + gic_base_lo = + lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + gic_base_hi = + upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + + qman_base_addr = hdev->asic_prop.sram_base_address + + MME_QMAN_BASE_OFFSET; + + WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr)); + WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr)); + WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH)); + WREG32(mmMME_QM_PQ_PI, 0); + WREG32(mmMME_QM_PQ_CI, 0); + WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0); + WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4); + WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8); + WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC); + + WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); + WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); + WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo); + WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi); + + /* QMAN CQ has 8 cache lines */ + WREG32(mmMME_QM_CQ_CFG1, 0x00080008); + + WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo); + WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi); + + WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM); + + WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN); + + WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT); + + WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE); +} + +static void goya_init_mme_cmdq(struct hl_device *hdev) +{ + u32 mtr_base_lo, mtr_base_hi; + u32 so_base_lo, so_base_hi; + u32 gic_base_lo, gic_base_hi; + u64 qman_base_addr; + + mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + + gic_base_lo = + lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + gic_base_hi = + upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + + qman_base_addr = hdev->asic_prop.sram_base_address + + MME_QMAN_BASE_OFFSET; + + WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); + WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); + WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo); + WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi); + + /* CMDQ CQ has 20 cache lines */ + WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014); + + WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo); + WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi); + + WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ); + + WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN); + + WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT); + + WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE); +} + +static void goya_init_mme_qmans(struct hl_device *hdev) +{ + struct goya_device *goya = hdev->asic_specific; + u32 so_base_lo, so_base_hi; + + if (goya->hw_cap_initialized & HW_CAP_MME) + return; + + so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + + WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo); + WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi); + + goya_init_mme_qman(hdev); + goya_init_mme_cmdq(hdev); + + goya->hw_cap_initialized |= HW_CAP_MME; +} + +static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id) +{ + u32 mtr_base_lo, mtr_base_hi; + u32 so_base_lo, so_base_hi; + u32 gic_base_lo, gic_base_hi; + u64 qman_base_addr; + u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI); + + mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + + gic_base_lo = + lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + gic_base_hi = + upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + + qman_base_addr = hdev->asic_prop.sram_base_address + base_off; + + WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr)); + WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr)); + WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH)); + WREG32(mmTPC0_QM_PQ_PI + reg_off, 0); + WREG32(mmTPC0_QM_PQ_CI + reg_off, 0); + WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0); + WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4); + WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8); + WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC); + + WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo); + WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi); + WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo); + WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi); + + WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008); + + WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo); + WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi); + + WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off, + GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id); + + WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN); + + WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT); + + WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE); +} + +static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id) +{ + u32 mtr_base_lo, mtr_base_hi; + u32 so_base_lo, so_base_hi; + u32 gic_base_lo, gic_base_hi; + u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1); + + mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); + so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + + gic_base_lo = + lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + gic_base_hi = + upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); + + WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo); + WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi); + WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo); + WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi); + + WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014); + + WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo); + WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi); + + WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off, + GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id); + + WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN); + + WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT); + + WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE); +} + +static void goya_init_tpc_qmans(struct hl_device *hdev) +{ + struct goya_device *goya = hdev->asic_specific; + u32 so_base_lo, so_base_hi; + u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW - + mmTPC0_CFG_SM_BASE_ADDRESS_LOW; + int i; + + if (goya->hw_cap_initialized & HW_CAP_TPC) + return; + + so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0); + + for (i = 0 ; i < TPC_MAX_NUM ; i++) { + WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off, + so_base_lo); + WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off, + so_base_hi); + } + + goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0); + goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1); + goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2); + goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3); + goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4); + goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5); + goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6); + goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7); + + for (i = 0 ; i < TPC_MAX_NUM ; i++) + goya_init_tpc_cmdq(hdev, i); + + goya->hw_cap_initialized |= HW_CAP_TPC; +} + +/* + * goya_disable_internal_queues - Disable internal queues + * + * @hdev: pointer to hl_device structure + * + */ +static void goya_disable_internal_queues(struct hl_device *hdev) +{ + WREG32(mmMME_QM_GLBL_CFG0, 0); + WREG32(mmMME_CMDQ_GLBL_CFG0, 0); + + WREG32(mmTPC0_QM_GLBL_CFG0, 0); + WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0); + + WREG32(mmTPC1_QM_GLBL_CFG0, 0); + WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0); + + WREG32(mmTPC2_QM_GLBL_CFG0, 0); + WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0); + + WREG32(mmTPC3_QM_GLBL_CFG0, 0); + WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0); + + WREG32(mmTPC4_QM_GLBL_CFG0, 0); + WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0); + + WREG32(mmTPC5_QM_GLBL_CFG0, 0); + WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0); + + WREG32(mmTPC6_QM_GLBL_CFG0, 0); + WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0); + + WREG32(mmTPC7_QM_GLBL_CFG0, 0); + WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0); +} + +/* + * goya_stop_internal_queues - Stop internal queues + * + * @hdev: pointer to hl_device structure + * + * Returns 0 on success + * + */ +static int goya_stop_internal_queues(struct hl_device *hdev) +{ + int rc, retval = 0; + + /* + * Each queue (QMAN) is a separate H/W logic. That means that each + * QMAN can be stopped independently and failure to stop one does NOT + * mandate we should not try to stop other QMANs + */ + + rc = goya_stop_queue(hdev, + mmMME_QM_GLBL_CFG1, + mmMME_QM_CP_STS, + mmMME_QM_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop MME QMAN\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmMME_CMDQ_GLBL_CFG1, + mmMME_CMDQ_CP_STS, + mmMME_CMDQ_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop MME CMDQ\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC0_QM_GLBL_CFG1, + mmTPC0_QM_CP_STS, + mmTPC0_QM_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC0_CMDQ_GLBL_CFG1, + mmTPC0_CMDQ_CP_STS, + mmTPC0_CMDQ_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC1_QM_GLBL_CFG1, + mmTPC1_QM_CP_STS, + mmTPC1_QM_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC1_CMDQ_GLBL_CFG1, + mmTPC1_CMDQ_CP_STS, + mmTPC1_CMDQ_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC2_QM_GLBL_CFG1, + mmTPC2_QM_CP_STS, + mmTPC2_QM_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC2_CMDQ_GLBL_CFG1, + mmTPC2_CMDQ_CP_STS, + mmTPC2_CMDQ_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC3_QM_GLBL_CFG1, + mmTPC3_QM_CP_STS, + mmTPC3_QM_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC3_CMDQ_GLBL_CFG1, + mmTPC3_CMDQ_CP_STS, + mmTPC3_CMDQ_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC4_QM_GLBL_CFG1, + mmTPC4_QM_CP_STS, + mmTPC4_QM_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC4_CMDQ_GLBL_CFG1, + mmTPC4_CMDQ_CP_STS, + mmTPC4_CMDQ_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC5_QM_GLBL_CFG1, + mmTPC5_QM_CP_STS, + mmTPC5_QM_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC5_CMDQ_GLBL_CFG1, + mmTPC5_CMDQ_CP_STS, + mmTPC5_CMDQ_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC6_QM_GLBL_CFG1, + mmTPC6_QM_CP_STS, + mmTPC6_QM_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC6_CMDQ_GLBL_CFG1, + mmTPC6_CMDQ_CP_STS, + mmTPC6_CMDQ_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC7_QM_GLBL_CFG1, + mmTPC7_QM_CP_STS, + mmTPC7_QM_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n"); + retval = -EIO; + } + + rc = goya_stop_queue(hdev, + mmTPC7_CMDQ_GLBL_CFG1, + mmTPC7_CMDQ_CP_STS, + mmTPC7_CMDQ_GLBL_STS0); + + if (rc) { + dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n"); + retval = -EIO; + } + + return retval; +} + +static void goya_resume_internal_queues(struct hl_device *hdev) +{ + WREG32(mmMME_QM_GLBL_CFG1, 0); + WREG32(mmMME_CMDQ_GLBL_CFG1, 0); + + WREG32(mmTPC0_QM_GLBL_CFG1, 0); + WREG32(mmTPC0_CMDQ_GLBL_CFG1, 0); + + WREG32(mmTPC1_QM_GLBL_CFG1, 0); + WREG32(mmTPC1_CMDQ_GLBL_CFG1, 0); + + WREG32(mmTPC2_QM_GLBL_CFG1, 0); + WREG32(mmTPC2_CMDQ_GLBL_CFG1, 0); + + WREG32(mmTPC3_QM_GLBL_CFG1, 0); + WREG32(mmTPC3_CMDQ_GLBL_CFG1, 0); + + WREG32(mmTPC4_QM_GLBL_CFG1, 0); + WREG32(mmTPC4_CMDQ_GLBL_CFG1, 0); + + WREG32(mmTPC5_QM_GLBL_CFG1, 0); + WREG32(mmTPC5_CMDQ_GLBL_CFG1, 0); + + WREG32(mmTPC6_QM_GLBL_CFG1, 0); + WREG32(mmTPC6_CMDQ_GLBL_CFG1, 0); + + WREG32(mmTPC7_QM_GLBL_CFG1, 0); + WREG32(mmTPC7_CMDQ_GLBL_CFG1, 0); +} + /* * goya_push_fw_to_device - Push FW code to device @@ -1344,6 +2183,19 @@ static int goya_hw_init(struct hl_device *hdev) goya_init_security(hdev); + goya_init_dma_qmans(hdev); + + goya_init_mme_qmans(hdev); + + goya_init_tpc_qmans(hdev); + + rc = goya_init_cpu_queues(hdev); + if (rc) { + dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", + rc); + goto disable_queues; + } + /* CPU initialization is finished, we can now move to 48 bit DMA mask */ rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(48)); |