summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2020-05-11 10:29:27 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2020-05-19 14:48:41 +0300
commitac0ae6a96aa58eeba4aed97b12ef1dea8c5bf399 (patch)
treed5285f4d57b1e88aaea03219aa0843fcd9244597 /drivers/misc/habanalabs
parent466c7822b054ffe5bb425c8f98d08676501836e8 (diff)
habanalabs: add gaudi asic-dependent code
Add the ASIC-dependent code for GAUDI. Supply (almost) all of the function callbacks that the driver's common code need to initialize, finalize and submit workloads to the GAUDI ASIC. It also contains the code to initialize the F/W of the GAUDI ASIC and to receive events from the F/W. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/misc/habanalabs')
-rw-r--r--drivers/misc/habanalabs/Makefile3
-rw-r--r--drivers/misc/habanalabs/gaudi/Makefile4
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c7360
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudiP.h251
-rw-r--r--drivers/misc/habanalabs/habanalabs.h24
-rw-r--r--drivers/misc/habanalabs/habanalabs_drv.c7
-rw-r--r--drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map.h687
7 files changed, 8335 insertions, 1 deletions
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index 482f6227dbba..421ebd903069 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -13,3 +13,6 @@ habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
+
+include $(src)/gaudi/Makefile
+habanalabs-y += $(HL_GAUDI_FILES)
diff --git a/drivers/misc/habanalabs/gaudi/Makefile b/drivers/misc/habanalabs/gaudi/Makefile
new file mode 100644
index 000000000000..b30b523881a0
--- /dev/null
+++ b/drivers/misc/habanalabs/gaudi/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)
+
+HL_GAUDI_FILES := gaudi/gaudi.o \ No newline at end of file
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
new file mode 100644
index 000000000000..8f3591e23a3c
--- /dev/null
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -0,0 +1,7360 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "gaudiP.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+#include "include/hw_ip/mmu/mmu_v1_1.h"
+#include "include/gaudi/gaudi_masks.h"
+#include "include/gaudi/gaudi_fw_if.h"
+#include "include/gaudi/gaudi_reg_map.h"
+#include "include/gaudi/gaudi_async_ids_map.h"
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/firmware.h>
+#include <linux/hwmon.h>
+#include <linux/genalloc.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/iommu.h>
+#include <linux/seq_file.h>
+
+/*
+ * Gaudi security scheme:
+ *
+ * 1. Host is protected by:
+ * - Range registers
+ * - MMU
+ *
+ * 2. DDR is protected by:
+ * - Range registers (protect the first 512MB)
+ *
+ * 3. Configuration is protected by:
+ * - Range registers
+ * - Protection bits
+ *
+ * MMU is always enabled.
+ *
+ * QMAN DMA channels 0,1,5 (PCI DMAN):
+ * - DMA is not secured.
+ * - PQ and CQ are secured.
+ * - CP is secured: The driver needs to parse CB but WREG should be allowed
+ * because of TDMA (tensor DMA). Hence, WREG is always not
+ * secured.
+ *
+ * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
+ * channel 0 to be secured, execute the DMA and change it back to not secured.
+ * Currently, the driver doesn't use the DMA while there are compute jobs
+ * running.
+ *
+ * The current use cases for the driver to use the DMA are:
+ * - Clear SRAM on context switch (happens on context switch when device is
+ * idle)
+ * - MMU page tables area clear (happens on init)
+ *
+ * QMAN DMA 2-4,6,7, TPC, MME, NIC:
+ * PQ is secured and is located on the Host (HBM CON TPC3 bug)
+ * CQ, CP and the engine are not secured
+ *
+ */
+
+#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
+#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
+#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
+
+#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
+
+#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
+#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
+#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
+#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
+
+#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
+#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
+#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */
+#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
+#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
+#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
+#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
+#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
+
+#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
+
+#define GAUDI_MAX_STRING_LEN 20
+
+#define GAUDI_CB_POOL_CB_CNT 512
+#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
+
+#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
+
+#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
+
+#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
+
+#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
+
+#define GAUDI_ARB_WDT_TIMEOUT 0x400000
+
+static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
+ "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
+ "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
+ "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
+ "gaudi cpu eq"
+};
+
+static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
+ [GAUDI_PCI_DMA_1] = 0,
+ [GAUDI_PCI_DMA_2] = 1,
+ [GAUDI_PCI_DMA_3] = 5,
+ [GAUDI_HBM_DMA_1] = 2,
+ [GAUDI_HBM_DMA_2] = 3,
+ [GAUDI_HBM_DMA_3] = 4,
+ [GAUDI_HBM_DMA_4] = 6,
+ [GAUDI_HBM_DMA_5] = 7
+};
+
+static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
+ [0] = GAUDI_QUEUE_ID_DMA_0_0,
+ [1] = GAUDI_QUEUE_ID_DMA_0_1,
+ [2] = GAUDI_QUEUE_ID_DMA_0_2,
+ [3] = GAUDI_QUEUE_ID_DMA_0_3,
+ [4] = GAUDI_QUEUE_ID_DMA_1_0,
+ [5] = GAUDI_QUEUE_ID_DMA_1_1,
+ [6] = GAUDI_QUEUE_ID_DMA_1_2,
+ [7] = GAUDI_QUEUE_ID_DMA_1_3,
+ [8] = GAUDI_QUEUE_ID_DMA_5_0,
+ [9] = GAUDI_QUEUE_ID_DMA_5_1,
+ [10] = GAUDI_QUEUE_ID_DMA_5_2,
+ [11] = GAUDI_QUEUE_ID_DMA_5_3
+};
+
+static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
+ [PACKET_WREG_32] = sizeof(struct packet_wreg32),
+ [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
+ [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
+ [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
+ [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
+ [PACKET_REPEAT] = sizeof(struct packet_repeat),
+ [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
+ [PACKET_FENCE] = sizeof(struct packet_fence),
+ [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
+ [PACKET_NOP] = sizeof(struct packet_nop),
+ [PACKET_STOP] = sizeof(struct packet_stop),
+ [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
+ [PACKET_WAIT] = sizeof(struct packet_wait),
+ [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
+};
+
+static const u32 gaudi_all_events[] = {
+ GAUDI_EVENT_PCIE_CORE_SERR,
+ GAUDI_EVENT_PCIE_CORE_DERR,
+ GAUDI_EVENT_PCIE_IF_SERR,
+ GAUDI_EVENT_PCIE_IF_DERR,
+ GAUDI_EVENT_PCIE_PHY_SERR,
+ GAUDI_EVENT_PCIE_PHY_DERR,
+ GAUDI_EVENT_TPC0_SERR,
+ GAUDI_EVENT_TPC1_SERR,
+ GAUDI_EVENT_TPC2_SERR,
+ GAUDI_EVENT_TPC3_SERR,
+ GAUDI_EVENT_TPC4_SERR,
+ GAUDI_EVENT_TPC5_SERR,
+ GAUDI_EVENT_TPC6_SERR,
+ GAUDI_EVENT_TPC7_SERR,
+ GAUDI_EVENT_TPC0_DERR,
+ GAUDI_EVENT_TPC1_DERR,
+ GAUDI_EVENT_TPC2_DERR,
+ GAUDI_EVENT_TPC3_DERR,
+ GAUDI_EVENT_TPC4_DERR,
+ GAUDI_EVENT_TPC5_DERR,
+ GAUDI_EVENT_TPC6_DERR,
+ GAUDI_EVENT_TPC7_DERR,
+ GAUDI_EVENT_MME0_ACC_SERR,
+ GAUDI_EVENT_MME0_ACC_DERR,
+ GAUDI_EVENT_MME0_SBAB_SERR,
+ GAUDI_EVENT_MME0_SBAB_DERR,
+ GAUDI_EVENT_MME1_ACC_SERR,
+ GAUDI_EVENT_MME1_ACC_DERR,
+ GAUDI_EVENT_MME1_SBAB_SERR,
+ GAUDI_EVENT_MME1_SBAB_DERR,
+ GAUDI_EVENT_MME2_ACC_SERR,
+ GAUDI_EVENT_MME2_ACC_DERR,
+ GAUDI_EVENT_MME2_SBAB_SERR,
+ GAUDI_EVENT_MME2_SBAB_DERR,
+ GAUDI_EVENT_MME3_ACC_SERR,
+ GAUDI_EVENT_MME3_ACC_DERR,
+ GAUDI_EVENT_MME3_SBAB_SERR,
+ GAUDI_EVENT_MME3_SBAB_DERR,
+ GAUDI_EVENT_DMA0_SERR_ECC,
+ GAUDI_EVENT_DMA1_SERR_ECC,
+ GAUDI_EVENT_DMA2_SERR_ECC,
+ GAUDI_EVENT_DMA3_SERR_ECC,
+ GAUDI_EVENT_DMA4_SERR_ECC,
+ GAUDI_EVENT_DMA5_SERR_ECC,
+ GAUDI_EVENT_DMA6_SERR_ECC,
+ GAUDI_EVENT_DMA7_SERR_ECC,
+ GAUDI_EVENT_DMA0_DERR_ECC,
+ GAUDI_EVENT_DMA1_DERR_ECC,
+ GAUDI_EVENT_DMA2_DERR_ECC,
+ GAUDI_EVENT_DMA3_DERR_ECC,
+ GAUDI_EVENT_DMA4_DERR_ECC,
+ GAUDI_EVENT_DMA5_DERR_ECC,
+ GAUDI_EVENT_DMA6_DERR_ECC,
+ GAUDI_EVENT_DMA7_DERR_ECC,
+ GAUDI_EVENT_CPU_IF_ECC_SERR,
+ GAUDI_EVENT_CPU_IF_ECC_DERR,
+ GAUDI_EVENT_PSOC_MEM_SERR,
+ GAUDI_EVENT_PSOC_CORESIGHT_SERR,
+ GAUDI_EVENT_PSOC_MEM_DERR,
+ GAUDI_EVENT_PSOC_CORESIGHT_DERR,
+ GAUDI_EVENT_SRAM0_SERR,
+ GAUDI_EVENT_SRAM1_SERR,
+ GAUDI_EVENT_SRAM2_SERR,
+ GAUDI_EVENT_SRAM3_SERR,
+ GAUDI_EVENT_SRAM7_SERR,
+ GAUDI_EVENT_SRAM6_SERR,
+ GAUDI_EVENT_SRAM5_SERR,
+ GAUDI_EVENT_SRAM4_SERR,
+ GAUDI_EVENT_SRAM8_SERR,
+ GAUDI_EVENT_SRAM9_SERR,
+ GAUDI_EVENT_SRAM10_SERR,
+ GAUDI_EVENT_SRAM11_SERR,
+ GAUDI_EVENT_SRAM15_SERR,
+ GAUDI_EVENT_SRAM14_SERR,
+ GAUDI_EVENT_SRAM13_SERR,
+ GAUDI_EVENT_SRAM12_SERR,
+ GAUDI_EVENT_SRAM16_SERR,
+ GAUDI_EVENT_SRAM17_SERR,
+ GAUDI_EVENT_SRAM18_SERR,
+ GAUDI_EVENT_SRAM19_SERR,
+ GAUDI_EVENT_SRAM23_SERR,
+ GAUDI_EVENT_SRAM22_SERR,
+ GAUDI_EVENT_SRAM21_SERR,
+ GAUDI_EVENT_SRAM20_SERR,
+ GAUDI_EVENT_SRAM24_SERR,
+ GAUDI_EVENT_SRAM25_SERR,
+ GAUDI_EVENT_SRAM26_SERR,
+ GAUDI_EVENT_SRAM27_SERR,
+ GAUDI_EVENT_SRAM31_SERR,
+ GAUDI_EVENT_SRAM30_SERR,
+ GAUDI_EVENT_SRAM29_SERR,
+ GAUDI_EVENT_SRAM28_SERR,
+ GAUDI_EVENT_SRAM0_DERR,
+ GAUDI_EVENT_SRAM1_DERR,
+ GAUDI_EVENT_SRAM2_DERR,
+ GAUDI_EVENT_SRAM3_DERR,
+ GAUDI_EVENT_SRAM7_DERR,
+ GAUDI_EVENT_SRAM6_DERR,
+ GAUDI_EVENT_SRAM5_DERR,
+ GAUDI_EVENT_SRAM4_DERR,
+ GAUDI_EVENT_SRAM8_DERR,
+ GAUDI_EVENT_SRAM9_DERR,
+ GAUDI_EVENT_SRAM10_DERR,
+ GAUDI_EVENT_SRAM11_DERR,
+ GAUDI_EVENT_SRAM15_DERR,
+ GAUDI_EVENT_SRAM14_DERR,
+ GAUDI_EVENT_SRAM13_DERR,
+ GAUDI_EVENT_SRAM12_DERR,
+ GAUDI_EVENT_SRAM16_DERR,
+ GAUDI_EVENT_SRAM17_DERR,
+ GAUDI_EVENT_SRAM18_DERR,
+ GAUDI_EVENT_SRAM19_DERR,
+ GAUDI_EVENT_SRAM23_DERR,
+ GAUDI_EVENT_SRAM22_DERR,
+ GAUDI_EVENT_SRAM21_DERR,
+ GAUDI_EVENT_SRAM20_DERR,
+ GAUDI_EVENT_SRAM24_DERR,
+ GAUDI_EVENT_SRAM25_DERR,
+ GAUDI_EVENT_SRAM26_DERR,
+ GAUDI_EVENT_SRAM27_DERR,
+ GAUDI_EVENT_SRAM31_DERR,
+ GAUDI_EVENT_SRAM30_DERR,
+ GAUDI_EVENT_SRAM29_DERR,
+ GAUDI_EVENT_SRAM28_DERR,
+ GAUDI_EVENT_NIC0_SERR,
+ GAUDI_EVENT_NIC1_SERR,
+ GAUDI_EVENT_NIC2_SERR,
+ GAUDI_EVENT_NIC3_SERR,
+ GAUDI_EVENT_NIC4_SERR,
+ GAUDI_EVENT_NIC0_DERR,
+ GAUDI_EVENT_NIC1_DERR,
+ GAUDI_EVENT_NIC2_DERR,
+ GAUDI_EVENT_NIC3_DERR,
+ GAUDI_EVENT_NIC4_DERR,
+ GAUDI_EVENT_DMA_IF0_SERR,
+ GAUDI_EVENT_DMA_IF1_SERR,
+ GAUDI_EVENT_DMA_IF2_SERR,
+ GAUDI_EVENT_DMA_IF3_SERR,
+ GAUDI_EVENT_DMA_IF0_DERR,
+ GAUDI_EVENT_DMA_IF1_DERR,
+ GAUDI_EVENT_DMA_IF2_DERR,
+ GAUDI_EVENT_DMA_IF3_DERR,
+ GAUDI_EVENT_GIC500,
+ GAUDI_EVENT_HBM_0_SERR,
+ GAUDI_EVENT_HBM_1_SERR,
+ GAUDI_EVENT_HBM_2_SERR,
+ GAUDI_EVENT_HBM_3_SERR,
+ GAUDI_EVENT_HBM_0_DERR,
+ GAUDI_EVENT_HBM_1_DERR,
+ GAUDI_EVENT_HBM_2_DERR,
+ GAUDI_EVENT_HBM_3_DERR,
+ GAUDI_EVENT_MMU_SERR,
+ GAUDI_EVENT_MMU_DERR,
+ GAUDI_EVENT_PCIE_DEC,
+ GAUDI_EVENT_TPC0_DEC,
+ GAUDI_EVENT_TPC1_DEC,
+ GAUDI_EVENT_TPC2_DEC,
+ GAUDI_EVENT_TPC3_DEC,
+ GAUDI_EVENT_TPC4_DEC,
+ GAUDI_EVENT_TPC5_DEC,
+ GAUDI_EVENT_TPC6_DEC,
+ GAUDI_EVENT_TPC7_DEC,
+ GAUDI_EVENT_AXI_ECC,
+ GAUDI_EVENT_L2_RAM_ECC,
+ GAUDI_EVENT_MME0_WBC_RSP,
+ GAUDI_EVENT_MME0_SBAB0_RSP,
+ GAUDI_EVENT_MME1_WBC_RSP,
+ GAUDI_EVENT_MME1_SBAB0_RSP,
+ GAUDI_EVENT_MME2_WBC_RSP,
+ GAUDI_EVENT_MME2_SBAB0_RSP,
+ GAUDI_EVENT_MME3_WBC_RSP,
+ GAUDI_EVENT_MME3_SBAB0_RSP,
+ GAUDI_EVENT_PLL0,
+ GAUDI_EVENT_PLL1,
+ GAUDI_EVENT_PLL2,
+ GAUDI_EVENT_PLL3,
+ GAUDI_EVENT_PLL4,
+ GAUDI_EVENT_PLL5,
+ GAUDI_EVENT_PLL6,
+ GAUDI_EVENT_PLL7,
+ GAUDI_EVENT_PLL8,
+ GAUDI_EVENT_PLL9,
+ GAUDI_EVENT_PLL10,
+ GAUDI_EVENT_PLL11,
+ GAUDI_EVENT_PLL12,
+ GAUDI_EVENT_PLL13,
+ GAUDI_EVENT_PLL14,
+ GAUDI_EVENT_PLL15,
+ GAUDI_EVENT_PLL16,
+ GAUDI_EVENT_PLL17,
+ GAUDI_EVENT_CPU_AXI_SPLITTER,
+ GAUDI_EVENT_PSOC_AXI_DEC,
+ GAUDI_EVENT_PSOC_PRSTN_FALL,
+ GAUDI_EVENT_TPC0_BMON_SPMU,
+ GAUDI_EVENT_TPC0_KRN_ERR,
+ GAUDI_EVENT_TPC1_BMON_SPMU,
+ GAUDI_EVENT_TPC1_KRN_ERR,
+ GAUDI_EVENT_TPC2_BMON_SPMU,
+ GAUDI_EVENT_TPC2_KRN_ERR,
+ GAUDI_EVENT_TPC3_BMON_SPMU,
+ GAUDI_EVENT_TPC3_KRN_ERR,
+ GAUDI_EVENT_TPC4_BMON_SPMU,
+ GAUDI_EVENT_TPC4_KRN_ERR,
+ GAUDI_EVENT_TPC5_BMON_SPMU,
+ GAUDI_EVENT_TPC5_KRN_ERR,
+ GAUDI_EVENT_TPC6_BMON_SPMU,
+ GAUDI_EVENT_TPC6_KRN_ERR,
+ GAUDI_EVENT_TPC7_BMON_SPMU,
+ GAUDI_EVENT_TPC7_KRN_ERR,
+ GAUDI_EVENT_MMU_PAGE_FAULT,
+ GAUDI_EVENT_MMU_WR_PERM,
+ GAUDI_EVENT_DMA_BM_CH0,
+ GAUDI_EVENT_DMA_BM_CH1,
+ GAUDI_EVENT_DMA_BM_CH2,
+ GAUDI_EVENT_DMA_BM_CH3,
+ GAUDI_EVENT_DMA_BM_CH4,
+ GAUDI_EVENT_DMA_BM_CH5,
+ GAUDI_EVENT_DMA_BM_CH6,
+ GAUDI_EVENT_DMA_BM_CH7,
+ GAUDI_EVENT_HBM0_SPI_0,
+ GAUDI_EVENT_HBM0_SPI_1,
+ GAUDI_EVENT_HBM1_SPI_0,
+ GAUDI_EVENT_HBM1_SPI_1,
+ GAUDI_EVENT_HBM2_SPI_0,
+ GAUDI_EVENT_HBM2_SPI_1,
+ GAUDI_EVENT_HBM3_SPI_0,
+ GAUDI_EVENT_HBM3_SPI_1,
+ GAUDI_EVENT_RAZWI_OR_ADC,
+ GAUDI_EVENT_TPC0_QM,
+ GAUDI_EVENT_TPC1_QM,
+ GAUDI_EVENT_TPC2_QM,
+ GAUDI_EVENT_TPC3_QM,
+ GAUDI_EVENT_TPC4_QM,
+ GAUDI_EVENT_TPC5_QM,
+ GAUDI_EVENT_TPC6_QM,
+ GAUDI_EVENT_TPC7_QM,
+ GAUDI_EVENT_MME0_QM,
+ GAUDI_EVENT_MME2_QM,
+ GAUDI_EVENT_DMA0_QM,
+ GAUDI_EVENT_DMA1_QM,
+ GAUDI_EVENT_DMA2_QM,
+ GAUDI_EVENT_DMA3_QM,
+ GAUDI_EVENT_DMA4_QM,
+ GAUDI_EVENT_DMA5_QM,
+ GAUDI_EVENT_DMA6_QM,
+ GAUDI_EVENT_DMA7_QM,
+ GAUDI_EVENT_NIC0_QM0,
+ GAUDI_EVENT_NIC0_QM1,
+ GAUDI_EVENT_NIC1_QM0,
+ GAUDI_EVENT_NIC1_QM1,
+ GAUDI_EVENT_NIC2_QM0,
+ GAUDI_EVENT_NIC2_QM1,
+ GAUDI_EVENT_NIC3_QM0,
+ GAUDI_EVENT_NIC3_QM1,
+ GAUDI_EVENT_NIC4_QM0,
+ GAUDI_EVENT_NIC4_QM1,
+ GAUDI_EVENT_DMA0_CORE,
+ GAUDI_EVENT_DMA1_CORE,
+ GAUDI_EVENT_DMA2_CORE,
+ GAUDI_EVENT_DMA3_CORE,
+ GAUDI_EVENT_DMA4_CORE,
+ GAUDI_EVENT_DMA5_CORE,
+ GAUDI_EVENT_DMA6_CORE,
+ GAUDI_EVENT_DMA7_CORE,
+ GAUDI_EVENT_FIX_POWER_ENV_S,
+ GAUDI_EVENT_FIX_POWER_ENV_E,
+ GAUDI_EVENT_FIX_THERMAL_ENV_S,
+ GAUDI_EVENT_FIX_THERMAL_ENV_E,
+ GAUDI_EVENT_RAZWI_OR_ADC_SW
+};
+
+static const char * const
+gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
+ "tpc_address_exceed_slm",
+ "tpc_div_by_0",
+ "tpc_spu_mac_overflow",
+ "tpc_spu_addsub_overflow",
+ "tpc_spu_abs_overflow",
+ "tpc_spu_fp_dst_nan_inf",
+ "tpc_spu_fp_dst_denorm",
+ "tpc_vpu_mac_overflow",
+ "tpc_vpu_addsub_overflow",
+ "tpc_vpu_abs_overflow",
+ "tpc_vpu_fp_dst_nan_inf",
+ "tpc_vpu_fp_dst_denorm",
+ "tpc_assertions",
+ "tpc_illegal_instruction",
+ "tpc_pc_wrap_around",
+ "tpc_qm_sw_err",
+ "tpc_hbw_rresp_err",
+ "tpc_hbw_bresp_err",
+ "tpc_lbw_rresp_err",
+ "tpc_lbw_bresp_err"
+};
+
+static const char * const
+gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
+ "PQ AXI HBW error",
+ "CQ AXI HBW error",
+ "CP AXI HBW error",
+ "CP error due to undefined OPCODE",
+ "CP encountered STOP OPCODE",
+ "CP AXI LBW error",
+ "CP WRREG32 or WRBULK returned error",
+ "N/A",
+ "FENCE 0 inc over max value and clipped",
+ "FENCE 1 inc over max value and clipped",
+ "FENCE 2 inc over max value and clipped",
+ "FENCE 3 inc over max value and clipped",
+ "FENCE 0 dec under min value and clipped",
+ "FENCE 1 dec under min value and clipped",
+ "FENCE 2 dec under min value and clipped",
+ "FENCE 3 dec under min value and clipped"
+};
+
+static const char * const
+gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
+ "Choice push while full error",
+ "Choice Q watchdog error",
+ "MSG AXI LBW returned with error"
+};
+
+static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
+ QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
+};
+
+static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
+ u64 phys_addr);
+static int gaudi_send_job_on_qman0(struct hl_device *hdev,
+ struct hl_cs_job *job);
+static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
+ u32 size, u64 val);
+static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
+ u32 tpc_id);
+static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
+static int gaudi_armcp_info_get(struct hl_device *hdev);
+static void gaudi_disable_clock_gating(struct hl_device *hdev);
+static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
+
+static int gaudi_get_fixed_properties(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int i;
+
+ if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
+ dev_err(hdev->dev,
+ "Number of H/W queues must be smaller than %d\n",
+ HL_MAX_QUEUES);
+ return -EFAULT;
+ }
+
+ for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
+ if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
+ prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
+ prop->hw_queues_props[i].driver_only = 0;
+ prop->hw_queues_props[i].requires_kernel_cb = 1;
+ } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
+ prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
+ prop->hw_queues_props[i].driver_only = 1;
+ prop->hw_queues_props[i].requires_kernel_cb = 0;
+ } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
+ prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
+ prop->hw_queues_props[i].driver_only = 0;
+ prop->hw_queues_props[i].requires_kernel_cb = 0;
+ } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
+ prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
+ prop->hw_queues_props[i].driver_only = 0;
+ prop->hw_queues_props[i].requires_kernel_cb = 0;
+ }
+ }
+
+ for (; i < HL_MAX_QUEUES; i++)
+ prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
+
+ prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
+
+ prop->dram_base_address = DRAM_PHYS_BASE;
+ prop->dram_size = GAUDI_HBM_SIZE_32GB;
+ prop->dram_end_address = prop->dram_base_address +
+ prop->dram_size;
+ prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
+
+ prop->sram_base_address = SRAM_BASE_ADDR;
+ prop->sram_size = SRAM_SIZE;
+ prop->sram_end_address = prop->sram_base_address +
+ prop->sram_size;
+ prop->sram_user_base_address = prop->sram_base_address +
+ SRAM_USER_BASE_OFFSET;
+
+ prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
+ if (hdev->pldm)
+ prop->mmu_pgt_size = 0x800000; /* 8MB */
+ else
+ prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
+ prop->mmu_pte_size = HL_PTE_SIZE;
+ prop->mmu_hop_table_size = HOP_TABLE_SIZE;
+ prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
+ prop->dram_page_size = PAGE_SIZE_2MB;
+
+ prop->pmmu.hop0_shift = HOP0_SHIFT;
+ prop->pmmu.hop1_shift = HOP1_SHIFT;
+ prop->pmmu.hop2_shift = HOP2_SHIFT;
+ prop->pmmu.hop3_shift = HOP3_SHIFT;
+ prop->pmmu.hop4_shift = HOP4_SHIFT;
+ prop->pmmu.hop0_mask = HOP0_MASK;
+ prop->pmmu.hop1_mask = HOP1_MASK;
+ prop->pmmu.hop2_mask = HOP2_MASK;
+ prop->pmmu.hop3_mask = HOP3_MASK;
+ prop->pmmu.hop4_mask = HOP4_MASK;
+ prop->pmmu.start_addr = VA_HOST_SPACE_START;
+ prop->pmmu.end_addr =
+ (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
+ prop->pmmu.page_size = PAGE_SIZE_4KB;
+
+ /* PMMU and HPMMU are the same except of page size */
+ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
+ prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
+
+ /* shifts and masks are the same in PMMU and DMMU */
+ memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
+ prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
+ prop->dmmu.end_addr = VA_HOST_SPACE_END;
+ prop->dmmu.page_size = PAGE_SIZE_2MB;
+
+ prop->cfg_size = CFG_SIZE;
+ prop->max_asid = MAX_ASID;
+ prop->num_of_events = GAUDI_EVENT_SIZE;
+ prop->tpc_enabled_mask = TPC_ENABLED_MASK;
+
+ prop->max_power_default = MAX_POWER_DEFAULT;
+
+ prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
+ prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
+
+ prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
+ prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
+
+ strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+ CARD_NAME_MAX_LEN);
+
+ return 0;
+}
+
+static int gaudi_pci_bars_map(struct hl_device *hdev)
+{
+ static const char * const name[] = {"SRAM", "CFG", "HBM"};
+ bool is_wc[3] = {false, false, true};
+ int rc;
+
+ rc = hl_pci_bars_map(hdev, name, is_wc);
+ if (rc)
+ return rc;
+
+ hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
+ (CFG_BASE - SPI_FLASH_BASE_ADDR);
+
+ return 0;
+}
+
+static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
+{
+ struct gaudi_device *gaudi = hdev->asic_specific;
+ u64 old_addr = addr;
+ int rc;
+
+ if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
+ return old_addr;
+
+ /* Inbound Region 2 - Bar 4 - Point to HBM */
+ rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
+ if (rc)
+ return U64_MAX;
+
+ if (gaudi) {
+ old_addr = gaudi->hbm_bar_cur_addr;
+ gaudi->hbm_bar_cur_addr = addr;
+ }
+
+ return old_addr;
+}
+
+static int gaudi_init_iatu(struct hl_device *hdev)
+{
+ int rc = 0;
+
+ /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
+ rc = hl_pci_iatu_write(hdev, 0x314,
+ lower_32_bits(SPI_FLASH_BASE_ADDR));
+ rc |= hl_pci_iatu_write(hdev, 0x318,
+ upper_32_bits(SPI_FLASH_BASE_ADDR));
+ rc |= hl_pci_iatu_write(hdev, 0x300, 0);
+ /* Enable + Bar match + match enable */
+ rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
+
+ if (rc)
+ return -EIO;
+
+ return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
+ HOST_PHYS_BASE, HOST_PHYS_SIZE);
+}
+
+static int gaudi_early_init(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct pci_dev *pdev = hdev->pdev;
+ int rc;
+
+ rc = gaudi_get_fixed_properties(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to get fixed properties\n");
+ return rc;
+ }
+
+ /* Check BAR sizes */
+ if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
+ dev_err(hdev->dev,
+ "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
+ SRAM_BAR_ID,
+ (unsigned long long) pci_resource_len(pdev,
+ SRAM_BAR_ID),
+ SRAM_BAR_SIZE);
+ return -ENODEV;
+ }
+
+ if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
+ dev_err(hdev->dev,
+ "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
+ CFG_BAR_ID,
+ (unsigned long long) pci_resource_len(pdev,
+ CFG_BAR_ID),
+ CFG_BAR_SIZE);
+ return -ENODEV;
+ }
+
+ prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
+
+ rc = hl_pci_init(hdev);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+static int gaudi_early_fini(struct hl_device *hdev)
+{
+ hl_pci_fini(hdev);
+
+ return 0;
+}
+
+/**
+ * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ */
+static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+ prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
+ prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
+ prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
+ prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+}
+
+static int _gaudi_init_tpc_mem(struct hl_device *hdev,
+ dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct packet_lin_dma *init_tpc_mem_pkt;
+ struct hl_cs_job *job;
+ struct hl_cb *cb;
+ u64 dst_addr;
+ u32 cb_size, ctl;
+ u8 tpc_id;
+ int rc;
+
+ cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ if (!cb)
+ return -EFAULT;
+
+ init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
+ cb->kernel_address;
+ cb_size = sizeof(*init_tpc_mem_pkt);
+ memset(init_tpc_mem_pkt, 0, cb_size);
+
+ init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
+
+ ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
+ (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
+ (1 << GAUDI_PKT_CTL_RB_SHIFT) |
+ (1 << GAUDI_PKT_CTL_MB_SHIFT));
+
+ init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
+
+ init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
+ dst_addr = (prop->sram_user_base_address &
+ GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
+ GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
+ init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
+
+ job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
+ if (!job) {
+ dev_err(hdev->dev, "Failed to allocate a new job\n");
+ rc = -ENOMEM;
+ goto release_cb;
+ }
+
+ job->id = 0;
+ job->user_cb = cb;
+ job->user_cb->cs_cnt++;
+ job->user_cb_size = cb_size;
+ job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
+ job->patched_cb = job->user_cb;
+ job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
+
+ hl_debugfs_add_job(hdev, job);
+
+ rc = gaudi_send_job_on_qman0(hdev, job);
+
+ if (rc)
+ goto free_job;
+
+ for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
+ rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
+ if (rc)
+ break;
+ }
+
+free_job:
+ hl_userptr_delete_list(hdev, &job->userptr_list);
+ hl_debugfs_remove_job(hdev, job);
+ kfree(job);
+ cb->cs_cnt--;
+
+release_cb:
+ hl_cb_put(cb);
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+ return rc;
+}
+
+/*
+ * gaudi_init_tpc_mem() - Initialize TPC memories.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
+ *
+ * Return: 0 for success, negative value for error.
+ */
+static int gaudi_init_tpc_mem(struct hl_device *hdev)
+{
+ const struct firmware *fw;
+ size_t fw_size;
+ void *cpu_addr;
+ dma_addr_t dma_handle;
+ int rc;
+
+ rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
+ if (rc) {
+ dev_err(hdev->dev, "Firmware file %s is not found!\n",
+ GAUDI_TPC_FW_FILE);
+ goto out;
+ }
+
+ fw_size = fw->size;
+ cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
+ &dma_handle, GFP_KERNEL | __GFP_ZERO);
+ if (!cpu_addr) {
+ dev_err(hdev->dev,
+ "Failed to allocate %zu of dma memory for TPC kernel\n",
+ fw_size);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(cpu_addr, fw->data, fw_size);
+
+ rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
+
+ hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
+ dma_handle);
+
+out:
+ release_firmware(fw);
+ return rc;
+}
+
+static int gaudi_late_init(struct hl_device *hdev)
+{
+ struct gaudi_device *gaudi = hdev->asic_specific;
+ int rc;
+
+ rc = gaudi->armcp_info_get(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to get armcp info\n");
+ return rc;
+ }
+
+ rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+ return rc;
+ }
+
+ WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
+
+ gaudi_fetch_psoc_frequency(hdev);
+
+ rc = gaudi_mmu_clear_pgt_range(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
+ goto disable_pci_access;
+ }
+
+ rc = gaudi_init_tpc_mem(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize TPC memories\n");
+ goto disable_pci_access;
+ }
+
+ return 0;
+
+disable_pci_access:
+ hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+
+ return rc;
+}
+
+static void gaudi_late_fini(struct hl_device *hdev)
+{
+ const struct hwmon_channel_info **channel_info_arr;
+ int i = 0;
+
+ if (!hdev->hl_chip_info->info)
+ return;
+
+ channel_info_arr = hdev->hl_chip_info->info;
+
+ while (channel_info_arr[i]) {
+ kfree(channel_info_arr[i]->config);
+ kfree(channel_info_arr[i]);
+ i++;
+ }
+
+ kfree(channel_info_arr);
+
+ hdev->hl_chip_info->info = NULL;
+}
+
+static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
+{
+ dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
+ void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
+ int i, j, rc = 0;
+
+ /*
+ * The device CPU works with 40-bits addresses, while bit 39 must be set
+ * to '1' when accessing the host.
+ * Bits 49:39 of the full host address are saved for a later