diff options
-rw-r--r-- | drivers/misc/habanalabs/Makefile | 3 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/Makefile | 4 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 7360 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudiP.h | 251 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs.h | 24 | ||||
-rw-r--r-- | drivers/misc/habanalabs/habanalabs_drv.c | 7 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map.h | 687 |
7 files changed, 8335 insertions, 1 deletions
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index 482f6227dbba..421ebd903069 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -13,3 +13,6 @@ habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o include $(src)/goya/Makefile habanalabs-y += $(HL_GOYA_FILES) + +include $(src)/gaudi/Makefile +habanalabs-y += $(HL_GAUDI_FILES) diff --git a/drivers/misc/habanalabs/gaudi/Makefile b/drivers/misc/habanalabs/gaudi/Makefile new file mode 100644 index 000000000000..b30b523881a0 --- /dev/null +++ b/drivers/misc/habanalabs/gaudi/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src) + +HL_GAUDI_FILES := gaudi/gaudi.o
\ No newline at end of file diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c new file mode 100644 index 000000000000..8f3591e23a3c --- /dev/null +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -0,0 +1,7360 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2016-2020 HabanaLabs, Ltd. + * All Rights Reserved. + */ + +#include "gaudiP.h" +#include "include/hw_ip/mmu/mmu_general.h" +#include "include/hw_ip/mmu/mmu_v1_1.h" +#include "include/gaudi/gaudi_masks.h" +#include "include/gaudi/gaudi_fw_if.h" +#include "include/gaudi/gaudi_reg_map.h" +#include "include/gaudi/gaudi_async_ids_map.h" + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/firmware.h> +#include <linux/hwmon.h> +#include <linux/genalloc.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/iommu.h> +#include <linux/seq_file.h> + +/* + * Gaudi security scheme: + * + * 1. Host is protected by: + * - Range registers + * - MMU + * + * 2. DDR is protected by: + * - Range registers (protect the first 512MB) + * + * 3. Configuration is protected by: + * - Range registers + * - Protection bits + * + * MMU is always enabled. + * + * QMAN DMA channels 0,1,5 (PCI DMAN): + * - DMA is not secured. + * - PQ and CQ are secured. + * - CP is secured: The driver needs to parse CB but WREG should be allowed + * because of TDMA (tensor DMA). Hence, WREG is always not + * secured. + * + * When the driver needs to use DMA it will check that Gaudi is idle, set DMA + * channel 0 to be secured, execute the DMA and change it back to not secured. + * Currently, the driver doesn't use the DMA while there are compute jobs + * running. + * + * The current use cases for the driver to use the DMA are: + * - Clear SRAM on context switch (happens on context switch when device is + * idle) + * - MMU page tables area clear (happens on init) + * + * QMAN DMA 2-4,6,7, TPC, MME, NIC: + * PQ is secured and is located on the Host (HBM CON TPC3 bug) + * CQ, CP and the engine are not secured + * + */ + +#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" +#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" +#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" + +#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ + +#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */ +#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ +#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ +#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ + +#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ +#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ +#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */ +#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ +#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) +#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) +#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) +#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ + +#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 + +#define GAUDI_MAX_STRING_LEN 20 + +#define GAUDI_CB_POOL_CB_CNT 512 +#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ + +#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 + +#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 + +#define GAUDI_NUM_OF_QM_ERR_CAUSE 16 + +#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 + +#define GAUDI_ARB_WDT_TIMEOUT 0x400000 + +static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { + "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", + "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", + "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3", + "gaudi cpu eq" +}; + +static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { + [GAUDI_PCI_DMA_1] = 0, + [GAUDI_PCI_DMA_2] = 1, + [GAUDI_PCI_DMA_3] = 5, + [GAUDI_HBM_DMA_1] = 2, + [GAUDI_HBM_DMA_2] = 3, + [GAUDI_HBM_DMA_3] = 4, + [GAUDI_HBM_DMA_4] = 6, + [GAUDI_HBM_DMA_5] = 7 +}; + +static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { + [0] = GAUDI_QUEUE_ID_DMA_0_0, + [1] = GAUDI_QUEUE_ID_DMA_0_1, + [2] = GAUDI_QUEUE_ID_DMA_0_2, + [3] = GAUDI_QUEUE_ID_DMA_0_3, + [4] = GAUDI_QUEUE_ID_DMA_1_0, + [5] = GAUDI_QUEUE_ID_DMA_1_1, + [6] = GAUDI_QUEUE_ID_DMA_1_2, + [7] = GAUDI_QUEUE_ID_DMA_1_3, + [8] = GAUDI_QUEUE_ID_DMA_5_0, + [9] = GAUDI_QUEUE_ID_DMA_5_1, + [10] = GAUDI_QUEUE_ID_DMA_5_2, + [11] = GAUDI_QUEUE_ID_DMA_5_3 +}; + +static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { + [PACKET_WREG_32] = sizeof(struct packet_wreg32), + [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), + [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), + [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), + [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), + [PACKET_REPEAT] = sizeof(struct packet_repeat), + [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), + [PACKET_FENCE] = sizeof(struct packet_fence), + [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), + [PACKET_NOP] = sizeof(struct packet_nop), + [PACKET_STOP] = sizeof(struct packet_stop), + [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), + [PACKET_WAIT] = sizeof(struct packet_wait), + [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) +}; + +static const u32 gaudi_all_events[] = { + GAUDI_EVENT_PCIE_CORE_SERR, + GAUDI_EVENT_PCIE_CORE_DERR, + GAUDI_EVENT_PCIE_IF_SERR, + GAUDI_EVENT_PCIE_IF_DERR, + GAUDI_EVENT_PCIE_PHY_SERR, + GAUDI_EVENT_PCIE_PHY_DERR, + GAUDI_EVENT_TPC0_SERR, + GAUDI_EVENT_TPC1_SERR, + GAUDI_EVENT_TPC2_SERR, + GAUDI_EVENT_TPC3_SERR, + GAUDI_EVENT_TPC4_SERR, + GAUDI_EVENT_TPC5_SERR, + GAUDI_EVENT_TPC6_SERR, + GAUDI_EVENT_TPC7_SERR, + GAUDI_EVENT_TPC0_DERR, + GAUDI_EVENT_TPC1_DERR, + GAUDI_EVENT_TPC2_DERR, + GAUDI_EVENT_TPC3_DERR, + GAUDI_EVENT_TPC4_DERR, + GAUDI_EVENT_TPC5_DERR, + GAUDI_EVENT_TPC6_DERR, + GAUDI_EVENT_TPC7_DERR, + GAUDI_EVENT_MME0_ACC_SERR, + GAUDI_EVENT_MME0_ACC_DERR, + GAUDI_EVENT_MME0_SBAB_SERR, + GAUDI_EVENT_MME0_SBAB_DERR, + GAUDI_EVENT_MME1_ACC_SERR, + GAUDI_EVENT_MME1_ACC_DERR, + GAUDI_EVENT_MME1_SBAB_SERR, + GAUDI_EVENT_MME1_SBAB_DERR, + GAUDI_EVENT_MME2_ACC_SERR, + GAUDI_EVENT_MME2_ACC_DERR, + GAUDI_EVENT_MME2_SBAB_SERR, + GAUDI_EVENT_MME2_SBAB_DERR, + GAUDI_EVENT_MME3_ACC_SERR, + GAUDI_EVENT_MME3_ACC_DERR, + GAUDI_EVENT_MME3_SBAB_SERR, + GAUDI_EVENT_MME3_SBAB_DERR, + GAUDI_EVENT_DMA0_SERR_ECC, + GAUDI_EVENT_DMA1_SERR_ECC, + GAUDI_EVENT_DMA2_SERR_ECC, + GAUDI_EVENT_DMA3_SERR_ECC, + GAUDI_EVENT_DMA4_SERR_ECC, + GAUDI_EVENT_DMA5_SERR_ECC, + GAUDI_EVENT_DMA6_SERR_ECC, + GAUDI_EVENT_DMA7_SERR_ECC, + GAUDI_EVENT_DMA0_DERR_ECC, + GAUDI_EVENT_DMA1_DERR_ECC, + GAUDI_EVENT_DMA2_DERR_ECC, + GAUDI_EVENT_DMA3_DERR_ECC, + GAUDI_EVENT_DMA4_DERR_ECC, + GAUDI_EVENT_DMA5_DERR_ECC, + GAUDI_EVENT_DMA6_DERR_ECC, + GAUDI_EVENT_DMA7_DERR_ECC, + GAUDI_EVENT_CPU_IF_ECC_SERR, + GAUDI_EVENT_CPU_IF_ECC_DERR, + GAUDI_EVENT_PSOC_MEM_SERR, + GAUDI_EVENT_PSOC_CORESIGHT_SERR, + GAUDI_EVENT_PSOC_MEM_DERR, + GAUDI_EVENT_PSOC_CORESIGHT_DERR, + GAUDI_EVENT_SRAM0_SERR, + GAUDI_EVENT_SRAM1_SERR, + GAUDI_EVENT_SRAM2_SERR, + GAUDI_EVENT_SRAM3_SERR, + GAUDI_EVENT_SRAM7_SERR, + GAUDI_EVENT_SRAM6_SERR, + GAUDI_EVENT_SRAM5_SERR, + GAUDI_EVENT_SRAM4_SERR, + GAUDI_EVENT_SRAM8_SERR, + GAUDI_EVENT_SRAM9_SERR, + GAUDI_EVENT_SRAM10_SERR, + GAUDI_EVENT_SRAM11_SERR, + GAUDI_EVENT_SRAM15_SERR, + GAUDI_EVENT_SRAM14_SERR, + GAUDI_EVENT_SRAM13_SERR, + GAUDI_EVENT_SRAM12_SERR, + GAUDI_EVENT_SRAM16_SERR, + GAUDI_EVENT_SRAM17_SERR, + GAUDI_EVENT_SRAM18_SERR, + GAUDI_EVENT_SRAM19_SERR, + GAUDI_EVENT_SRAM23_SERR, + GAUDI_EVENT_SRAM22_SERR, + GAUDI_EVENT_SRAM21_SERR, + GAUDI_EVENT_SRAM20_SERR, + GAUDI_EVENT_SRAM24_SERR, + GAUDI_EVENT_SRAM25_SERR, + GAUDI_EVENT_SRAM26_SERR, + GAUDI_EVENT_SRAM27_SERR, + GAUDI_EVENT_SRAM31_SERR, + GAUDI_EVENT_SRAM30_SERR, + GAUDI_EVENT_SRAM29_SERR, + GAUDI_EVENT_SRAM28_SERR, + GAUDI_EVENT_SRAM0_DERR, + GAUDI_EVENT_SRAM1_DERR, + GAUDI_EVENT_SRAM2_DERR, + GAUDI_EVENT_SRAM3_DERR, + GAUDI_EVENT_SRAM7_DERR, + GAUDI_EVENT_SRAM6_DERR, + GAUDI_EVENT_SRAM5_DERR, + GAUDI_EVENT_SRAM4_DERR, + GAUDI_EVENT_SRAM8_DERR, + GAUDI_EVENT_SRAM9_DERR, + GAUDI_EVENT_SRAM10_DERR, + GAUDI_EVENT_SRAM11_DERR, + GAUDI_EVENT_SRAM15_DERR, + GAUDI_EVENT_SRAM14_DERR, + GAUDI_EVENT_SRAM13_DERR, + GAUDI_EVENT_SRAM12_DERR, + GAUDI_EVENT_SRAM16_DERR, + GAUDI_EVENT_SRAM17_DERR, + GAUDI_EVENT_SRAM18_DERR, + GAUDI_EVENT_SRAM19_DERR, + GAUDI_EVENT_SRAM23_DERR, + GAUDI_EVENT_SRAM22_DERR, + GAUDI_EVENT_SRAM21_DERR, + GAUDI_EVENT_SRAM20_DERR, + GAUDI_EVENT_SRAM24_DERR, + GAUDI_EVENT_SRAM25_DERR, + GAUDI_EVENT_SRAM26_DERR, + GAUDI_EVENT_SRAM27_DERR, + GAUDI_EVENT_SRAM31_DERR, + GAUDI_EVENT_SRAM30_DERR, + GAUDI_EVENT_SRAM29_DERR, + GAUDI_EVENT_SRAM28_DERR, + GAUDI_EVENT_NIC0_SERR, + GAUDI_EVENT_NIC1_SERR, + GAUDI_EVENT_NIC2_SERR, + GAUDI_EVENT_NIC3_SERR, + GAUDI_EVENT_NIC4_SERR, + GAUDI_EVENT_NIC0_DERR, + GAUDI_EVENT_NIC1_DERR, + GAUDI_EVENT_NIC2_DERR, + GAUDI_EVENT_NIC3_DERR, + GAUDI_EVENT_NIC4_DERR, + GAUDI_EVENT_DMA_IF0_SERR, + GAUDI_EVENT_DMA_IF1_SERR, + GAUDI_EVENT_DMA_IF2_SERR, + GAUDI_EVENT_DMA_IF3_SERR, + GAUDI_EVENT_DMA_IF0_DERR, + GAUDI_EVENT_DMA_IF1_DERR, + GAUDI_EVENT_DMA_IF2_DERR, + GAUDI_EVENT_DMA_IF3_DERR, + GAUDI_EVENT_GIC500, + GAUDI_EVENT_HBM_0_SERR, + GAUDI_EVENT_HBM_1_SERR, + GAUDI_EVENT_HBM_2_SERR, + GAUDI_EVENT_HBM_3_SERR, + GAUDI_EVENT_HBM_0_DERR, + GAUDI_EVENT_HBM_1_DERR, + GAUDI_EVENT_HBM_2_DERR, + GAUDI_EVENT_HBM_3_DERR, + GAUDI_EVENT_MMU_SERR, + GAUDI_EVENT_MMU_DERR, + GAUDI_EVENT_PCIE_DEC, + GAUDI_EVENT_TPC0_DEC, + GAUDI_EVENT_TPC1_DEC, + GAUDI_EVENT_TPC2_DEC, + GAUDI_EVENT_TPC3_DEC, + GAUDI_EVENT_TPC4_DEC, + GAUDI_EVENT_TPC5_DEC, + GAUDI_EVENT_TPC6_DEC, + GAUDI_EVENT_TPC7_DEC, + GAUDI_EVENT_AXI_ECC, + GAUDI_EVENT_L2_RAM_ECC, + GAUDI_EVENT_MME0_WBC_RSP, + GAUDI_EVENT_MME0_SBAB0_RSP, + GAUDI_EVENT_MME1_WBC_RSP, + GAUDI_EVENT_MME1_SBAB0_RSP, + GAUDI_EVENT_MME2_WBC_RSP, + GAUDI_EVENT_MME2_SBAB0_RSP, + GAUDI_EVENT_MME3_WBC_RSP, + GAUDI_EVENT_MME3_SBAB0_RSP, + GAUDI_EVENT_PLL0, + GAUDI_EVENT_PLL1, + GAUDI_EVENT_PLL2, + GAUDI_EVENT_PLL3, + GAUDI_EVENT_PLL4, + GAUDI_EVENT_PLL5, + GAUDI_EVENT_PLL6, + GAUDI_EVENT_PLL7, + GAUDI_EVENT_PLL8, + GAUDI_EVENT_PLL9, + GAUDI_EVENT_PLL10, + GAUDI_EVENT_PLL11, + GAUDI_EVENT_PLL12, + GAUDI_EVENT_PLL13, + GAUDI_EVENT_PLL14, + GAUDI_EVENT_PLL15, + GAUDI_EVENT_PLL16, + GAUDI_EVENT_PLL17, + GAUDI_EVENT_CPU_AXI_SPLITTER, + GAUDI_EVENT_PSOC_AXI_DEC, + GAUDI_EVENT_PSOC_PRSTN_FALL, + GAUDI_EVENT_TPC0_BMON_SPMU, + GAUDI_EVENT_TPC0_KRN_ERR, + GAUDI_EVENT_TPC1_BMON_SPMU, + GAUDI_EVENT_TPC1_KRN_ERR, + GAUDI_EVENT_TPC2_BMON_SPMU, + GAUDI_EVENT_TPC2_KRN_ERR, + GAUDI_EVENT_TPC3_BMON_SPMU, + GAUDI_EVENT_TPC3_KRN_ERR, + GAUDI_EVENT_TPC4_BMON_SPMU, + GAUDI_EVENT_TPC4_KRN_ERR, + GAUDI_EVENT_TPC5_BMON_SPMU, + GAUDI_EVENT_TPC5_KRN_ERR, + GAUDI_EVENT_TPC6_BMON_SPMU, + GAUDI_EVENT_TPC6_KRN_ERR, + GAUDI_EVENT_TPC7_BMON_SPMU, + GAUDI_EVENT_TPC7_KRN_ERR, + GAUDI_EVENT_MMU_PAGE_FAULT, + GAUDI_EVENT_MMU_WR_PERM, + GAUDI_EVENT_DMA_BM_CH0, + GAUDI_EVENT_DMA_BM_CH1, + GAUDI_EVENT_DMA_BM_CH2, + GAUDI_EVENT_DMA_BM_CH3, + GAUDI_EVENT_DMA_BM_CH4, + GAUDI_EVENT_DMA_BM_CH5, + GAUDI_EVENT_DMA_BM_CH6, + GAUDI_EVENT_DMA_BM_CH7, + GAUDI_EVENT_HBM0_SPI_0, + GAUDI_EVENT_HBM0_SPI_1, + GAUDI_EVENT_HBM1_SPI_0, + GAUDI_EVENT_HBM1_SPI_1, + GAUDI_EVENT_HBM2_SPI_0, + GAUDI_EVENT_HBM2_SPI_1, + GAUDI_EVENT_HBM3_SPI_0, + GAUDI_EVENT_HBM3_SPI_1, + GAUDI_EVENT_RAZWI_OR_ADC, + GAUDI_EVENT_TPC0_QM, + GAUDI_EVENT_TPC1_QM, + GAUDI_EVENT_TPC2_QM, + GAUDI_EVENT_TPC3_QM, + GAUDI_EVENT_TPC4_QM, + GAUDI_EVENT_TPC5_QM, + GAUDI_EVENT_TPC6_QM, + GAUDI_EVENT_TPC7_QM, + GAUDI_EVENT_MME0_QM, + GAUDI_EVENT_MME2_QM, + GAUDI_EVENT_DMA0_QM, + GAUDI_EVENT_DMA1_QM, + GAUDI_EVENT_DMA2_QM, + GAUDI_EVENT_DMA3_QM, + GAUDI_EVENT_DMA4_QM, + GAUDI_EVENT_DMA5_QM, + GAUDI_EVENT_DMA6_QM, + GAUDI_EVENT_DMA7_QM, + GAUDI_EVENT_NIC0_QM0, + GAUDI_EVENT_NIC0_QM1, + GAUDI_EVENT_NIC1_QM0, + GAUDI_EVENT_NIC1_QM1, + GAUDI_EVENT_NIC2_QM0, + GAUDI_EVENT_NIC2_QM1, + GAUDI_EVENT_NIC3_QM0, + GAUDI_EVENT_NIC3_QM1, + GAUDI_EVENT_NIC4_QM0, + GAUDI_EVENT_NIC4_QM1, + GAUDI_EVENT_DMA0_CORE, + GAUDI_EVENT_DMA1_CORE, + GAUDI_EVENT_DMA2_CORE, + GAUDI_EVENT_DMA3_CORE, + GAUDI_EVENT_DMA4_CORE, + GAUDI_EVENT_DMA5_CORE, + GAUDI_EVENT_DMA6_CORE, + GAUDI_EVENT_DMA7_CORE, + GAUDI_EVENT_FIX_POWER_ENV_S, + GAUDI_EVENT_FIX_POWER_ENV_E, + GAUDI_EVENT_FIX_THERMAL_ENV_S, + GAUDI_EVENT_FIX_THERMAL_ENV_E, + GAUDI_EVENT_RAZWI_OR_ADC_SW +}; + +static const char * const +gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { + "tpc_address_exceed_slm", + "tpc_div_by_0", + "tpc_spu_mac_overflow", + "tpc_spu_addsub_overflow", + "tpc_spu_abs_overflow", + "tpc_spu_fp_dst_nan_inf", + "tpc_spu_fp_dst_denorm", + "tpc_vpu_mac_overflow", + "tpc_vpu_addsub_overflow", + "tpc_vpu_abs_overflow", + "tpc_vpu_fp_dst_nan_inf", + "tpc_vpu_fp_dst_denorm", + "tpc_assertions", + "tpc_illegal_instruction", + "tpc_pc_wrap_around", + "tpc_qm_sw_err", + "tpc_hbw_rresp_err", + "tpc_hbw_bresp_err", + "tpc_lbw_rresp_err", + "tpc_lbw_bresp_err" +}; + +static const char * const +gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { + "PQ AXI HBW error", + "CQ AXI HBW error", + "CP AXI HBW error", + "CP error due to undefined OPCODE", + "CP encountered STOP OPCODE", + "CP AXI LBW error", + "CP WRREG32 or WRBULK returned error", + "N/A", + "FENCE 0 inc over max value and clipped", + "FENCE 1 inc over max value and clipped", + "FENCE 2 inc over max value and clipped", + "FENCE 3 inc over max value and clipped", + "FENCE 0 dec under min value and clipped", + "FENCE 1 dec under min value and clipped", + "FENCE 2 dec under min value and clipped", + "FENCE 3 dec under min value and clipped" +}; + +static const char * const +gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { + "Choice push while full error", + "Choice Q watchdog error", + "MSG AXI LBW returned with error" +}; + +static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ + QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */ + QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ + QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */ + QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */ +}; + +static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, + u64 phys_addr); +static int gaudi_send_job_on_qman0(struct hl_device *hdev, + struct hl_cs_job *job); +static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, + u32 size, u64 val); +static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, + u32 tpc_id); +static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); +static int gaudi_armcp_info_get(struct hl_device *hdev); +static void gaudi_disable_clock_gating(struct hl_device *hdev); +static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); + +static int gaudi_get_fixed_properties(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + int i; + + if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) { + dev_err(hdev->dev, + "Number of H/W queues must be smaller than %d\n", + HL_MAX_QUEUES); + return -EFAULT; + } + + for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { + if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { + prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; + prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 1; + } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { + prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; + prop->hw_queues_props[i].driver_only = 1; + prop->hw_queues_props[i].requires_kernel_cb = 0; + } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { + prop->hw_queues_props[i].type = QUEUE_TYPE_INT; + prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 0; + } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) { + prop->hw_queues_props[i].type = QUEUE_TYPE_NA; + prop->hw_queues_props[i].driver_only = 0; + prop->hw_queues_props[i].requires_kernel_cb = 0; + } + } + + for (; i < HL_MAX_QUEUES; i++) + prop->hw_queues_props[i].type = QUEUE_TYPE_NA; + + prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; + + prop->dram_base_address = DRAM_PHYS_BASE; + prop->dram_size = GAUDI_HBM_SIZE_32GB; + prop->dram_end_address = prop->dram_base_address + + prop->dram_size; + prop->dram_user_base_address = DRAM_BASE_ADDR_USER; + + prop->sram_base_address = SRAM_BASE_ADDR; + prop->sram_size = SRAM_SIZE; + prop->sram_end_address = prop->sram_base_address + + prop->sram_size; + prop->sram_user_base_address = prop->sram_base_address + + SRAM_USER_BASE_OFFSET; + + prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; + if (hdev->pldm) + prop->mmu_pgt_size = 0x800000; /* 8MB */ + else + prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; + prop->mmu_pte_size = HL_PTE_SIZE; + prop->mmu_hop_table_size = HOP_TABLE_SIZE; + prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; + prop->dram_page_size = PAGE_SIZE_2MB; + + prop->pmmu.hop0_shift = HOP0_SHIFT; + prop->pmmu.hop1_shift = HOP1_SHIFT; + prop->pmmu.hop2_shift = HOP2_SHIFT; + prop->pmmu.hop3_shift = HOP3_SHIFT; + prop->pmmu.hop4_shift = HOP4_SHIFT; + prop->pmmu.hop0_mask = HOP0_MASK; + prop->pmmu.hop1_mask = HOP1_MASK; + prop->pmmu.hop2_mask = HOP2_MASK; + prop->pmmu.hop3_mask = HOP3_MASK; + prop->pmmu.hop4_mask = HOP4_MASK; + prop->pmmu.start_addr = VA_HOST_SPACE_START; + prop->pmmu.end_addr = + (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; + prop->pmmu.page_size = PAGE_SIZE_4KB; + + /* PMMU and HPMMU are the same except of page size */ + memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); + prop->pmmu_huge.page_size = PAGE_SIZE_2MB; + + /* shifts and masks are the same in PMMU and DMMU */ + memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); + prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); + prop->dmmu.end_addr = VA_HOST_SPACE_END; + prop->dmmu.page_size = PAGE_SIZE_2MB; + + prop->cfg_size = CFG_SIZE; + prop->max_asid = MAX_ASID; + prop->num_of_events = GAUDI_EVENT_SIZE; + prop->tpc_enabled_mask = TPC_ENABLED_MASK; + + prop->max_power_default = MAX_POWER_DEFAULT; + + prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; + prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; + + prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; + prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; + + strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + CARD_NAME_MAX_LEN); + + return 0; +} + +static int gaudi_pci_bars_map(struct hl_device *hdev) +{ + static const char * const name[] = {"SRAM", "CFG", "HBM"}; + bool is_wc[3] = {false, false, true}; + int rc; + + rc = hl_pci_bars_map(hdev, name, is_wc); + if (rc) + return rc; + + hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + + (CFG_BASE - SPI_FLASH_BASE_ADDR); + + return 0; +} + +static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + u64 old_addr = addr; + int rc; + + if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) + return old_addr; + + /* Inbound Region 2 - Bar 4 - Point to HBM */ + rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr); + if (rc) + return U64_MAX; + + if (gaudi) { + old_addr = gaudi->hbm_bar_cur_addr; + gaudi->hbm_bar_cur_addr = addr; + } + + return old_addr; +} + +static int gaudi_init_iatu(struct hl_device *hdev) +{ + int rc = 0; + + /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ + rc = hl_pci_iatu_write(hdev, 0x314, + lower_32_bits(SPI_FLASH_BASE_ADDR)); + rc |= hl_pci_iatu_write(hdev, 0x318, + upper_32_bits(SPI_FLASH_BASE_ADDR)); + rc |= hl_pci_iatu_write(hdev, 0x300, 0); + /* Enable + Bar match + match enable */ + rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200); + + if (rc) + return -EIO; + + return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE, + HOST_PHYS_BASE, HOST_PHYS_SIZE); +} + +static int gaudi_early_init(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct pci_dev *pdev = hdev->pdev; + int rc; + + rc = gaudi_get_fixed_properties(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to get fixed properties\n"); + return rc; + } + + /* Check BAR sizes */ + if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) { + dev_err(hdev->dev, + "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", + SRAM_BAR_ID, + (unsigned long long) pci_resource_len(pdev, + SRAM_BAR_ID), + SRAM_BAR_SIZE); + return -ENODEV; + } + + if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) { + dev_err(hdev->dev, + "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n", + CFG_BAR_ID, + (unsigned long long) pci_resource_len(pdev, + CFG_BAR_ID), + CFG_BAR_SIZE); + return -ENODEV; + } + + prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); + + rc = hl_pci_init(hdev); + if (rc) + return rc; + + return 0; +} + +static int gaudi_early_fini(struct hl_device *hdev) +{ + hl_pci_fini(hdev); + + return 0; +} + +/** + * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values + * + * @hdev: pointer to hl_device structure + * + */ +static void gaudi_fetch_psoc_frequency(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + + prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR); + prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF); + prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD); + prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); +} + +static int _gaudi_init_tpc_mem(struct hl_device *hdev, + dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct packet_lin_dma *init_tpc_mem_pkt; + struct hl_cs_job *job; + struct hl_cb *cb; + u64 dst_addr; + u32 cb_size, ctl; + u8 tpc_id; + int rc; + + cb = hl_cb_kernel_create(hdev, PAGE_SIZE); + if (!cb) + return -EFAULT; + + init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t) + cb->kernel_address; + cb_size = sizeof(*init_tpc_mem_pkt); + memset(init_tpc_mem_pkt, 0, cb_size); + + init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); + + ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) | + (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) | + (1 << GAUDI_PKT_CTL_RB_SHIFT) | + (1 << GAUDI_PKT_CTL_MB_SHIFT)); + + init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); + + init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); + dst_addr = (prop->sram_user_base_address & + GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> + GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; + init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); + + job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); + if (!job) { + dev_err(hdev->dev, "Failed to allocate a new job\n"); + rc = -ENOMEM; + goto release_cb; + } + + job->id = 0; + job->user_cb = cb; + job->user_cb->cs_cnt++; + job->user_cb_size = cb_size; + job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; + job->patched_cb = job->user_cb; + job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); + + hl_debugfs_add_job(hdev, job); + + rc = gaudi_send_job_on_qman0(hdev, job); + + if (rc) + goto free_job; + + for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { + rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); + if (rc) + break; + } + +free_job: + hl_userptr_delete_list(hdev, &job->userptr_list); + hl_debugfs_remove_job(hdev, job); + kfree(job); + cb->cs_cnt--; + +release_cb: + hl_cb_put(cb); + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + + return rc; +} + +/* + * gaudi_init_tpc_mem() - Initialize TPC memories. + * @hdev: Pointer to hl_device structure. + * + * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. + * + * Return: 0 for success, negative value for error. + */ +static int gaudi_init_tpc_mem(struct hl_device *hdev) +{ + const struct firmware *fw; + size_t fw_size; + void *cpu_addr; + dma_addr_t dma_handle; + int rc; + + rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); + if (rc) { + dev_err(hdev->dev, "Firmware file %s is not found!\n", + GAUDI_TPC_FW_FILE); + goto out; + } + + fw_size = fw->size; + cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size, + &dma_handle, GFP_KERNEL | __GFP_ZERO); + if (!cpu_addr) { + dev_err(hdev->dev, + "Failed to allocate %zu of dma memory for TPC kernel\n", + fw_size); + rc = -ENOMEM; + goto out; + } + + memcpy(cpu_addr, fw->data, fw_size); + + rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); + + hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr, + dma_handle); + +out: + release_firmware(fw); + return rc; +} + +static int gaudi_late_init(struct hl_device *hdev) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + int rc; + + rc = gaudi->armcp_info_get(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to get armcp info\n"); + return rc; + } + + rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); + if (rc) { + dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); + return rc; + } + + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); + + gaudi_fetch_psoc_frequency(hdev); + + rc = gaudi_mmu_clear_pgt_range(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); + goto disable_pci_access; + } + + rc = gaudi_init_tpc_mem(hdev); + if (rc) { + dev_err(hdev->dev, "Failed to initialize TPC memories\n"); + goto disable_pci_access; + } + + return 0; + +disable_pci_access: + hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + + return rc; +} + +static void gaudi_late_fini(struct hl_device *hdev) +{ + const struct hwmon_channel_info **channel_info_arr; + int i = 0; + + if (!hdev->hl_chip_info->info) + return; + + channel_info_arr = hdev->hl_chip_info->info; + + while (channel_info_arr[i]) { + kfree(channel_info_arr[i]->config); + kfree(channel_info_arr[i]); + i++; + } + + kfree(channel_info_arr); + + hdev->hl_chip_info->info = NULL; +} + +static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) +{ + dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; + void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; + int i, j, rc = 0; + + /* + * The device CPU works with 40-bits addresses, while bit 39 must be set + * to '1' when accessing the host. + * Bits 49:39 of the full host address are saved for a later + * configuration of the HW to perform extension to 50 bits. + * Because there is a single HW register that holds the extension bits, + * these bits must be identical in all allocated range. + */ + + for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { + virt_addr_arr[i] = + hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + HL_CPU_ACCESSIBLE_MEM_SIZE, + &dma_addr_arr[i], + GFP_KERNEL | __GFP_ZERO); + if (!virt_addr_arr[i]) { + rc = -ENOMEM; + goto free_dma_mem_arr; + } + + end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; + if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == + GAUDI_CPU_PCI_MSB_ADDR(end_addr)) + break; + } + + if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { + dev_err(hdev->dev, + "MSB of CPU accessible DMA memory are not identical in all range\n"); + rc = -EFAULT; + goto free_dma_mem_arr; + } + + hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; + hdev->cpu_accessible_dma_address = dma_addr_arr[i]; + hdev->cpu_pci_msb_addr = + GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); + + GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); + +free_dma_mem_arr: + for (j = 0 ; j < i ; j++) + hdev->asic_funcs->asic_dma_free_coherent(hdev, + HL_CPU_ACCESSIBLE_MEM_SIZE, + virt_addr_arr[j], + dma_addr_arr[j]); + + return r |