summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/misc/habanalabs/Makefile3
-rw-r--r--drivers/misc/habanalabs/gaudi/Makefile4
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c7360
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudiP.h251
-rw-r--r--drivers/misc/habanalabs/habanalabs.h24
-rw-r--r--drivers/misc/habanalabs/habanalabs_drv.c7
-rw-r--r--drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map.h687
7 files changed, 8335 insertions, 1 deletions
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index 482f6227dbba..421ebd903069 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -13,3 +13,6 @@ habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
+
+include $(src)/gaudi/Makefile
+habanalabs-y += $(HL_GAUDI_FILES)
diff --git a/drivers/misc/habanalabs/gaudi/Makefile b/drivers/misc/habanalabs/gaudi/Makefile
new file mode 100644
index 000000000000..b30b523881a0
--- /dev/null
+++ b/drivers/misc/habanalabs/gaudi/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)
+
+HL_GAUDI_FILES := gaudi/gaudi.o \ No newline at end of file
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
new file mode 100644
index 000000000000..8f3591e23a3c
--- /dev/null
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -0,0 +1,7360 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "gaudiP.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+#include "include/hw_ip/mmu/mmu_v1_1.h"
+#include "include/gaudi/gaudi_masks.h"
+#include "include/gaudi/gaudi_fw_if.h"
+#include "include/gaudi/gaudi_reg_map.h"
+#include "include/gaudi/gaudi_async_ids_map.h"
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/firmware.h>
+#include <linux/hwmon.h>
+#include <linux/genalloc.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/iommu.h>
+#include <linux/seq_file.h>
+
+/*
+ * Gaudi security scheme:
+ *
+ * 1. Host is protected by:
+ * - Range registers
+ * - MMU
+ *
+ * 2. DDR is protected by:
+ * - Range registers (protect the first 512MB)
+ *
+ * 3. Configuration is protected by:
+ * - Range registers
+ * - Protection bits
+ *
+ * MMU is always enabled.
+ *
+ * QMAN DMA channels 0,1,5 (PCI DMAN):
+ * - DMA is not secured.
+ * - PQ and CQ are secured.
+ * - CP is secured: The driver needs to parse CB but WREG should be allowed
+ * because of TDMA (tensor DMA). Hence, WREG is always not
+ * secured.
+ *
+ * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
+ * channel 0 to be secured, execute the DMA and change it back to not secured.
+ * Currently, the driver doesn't use the DMA while there are compute jobs
+ * running.
+ *
+ * The current use cases for the driver to use the DMA are:
+ * - Clear SRAM on context switch (happens on context switch when device is
+ * idle)
+ * - MMU page tables area clear (happens on init)
+ *
+ * QMAN DMA 2-4,6,7, TPC, MME, NIC:
+ * PQ is secured and is located on the Host (HBM CON TPC3 bug)
+ * CQ, CP and the engine are not secured
+ *
+ */
+
+#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
+#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
+#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
+
+#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
+
+#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
+#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
+#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
+#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
+
+#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
+#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
+#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */
+#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
+#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
+#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
+#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
+#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
+
+#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
+
+#define GAUDI_MAX_STRING_LEN 20
+
+#define GAUDI_CB_POOL_CB_CNT 512
+#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
+
+#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
+
+#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
+
+#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
+
+#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
+
+#define GAUDI_ARB_WDT_TIMEOUT 0x400000
+
+static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
+ "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
+ "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
+ "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
+ "gaudi cpu eq"
+};
+
+static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
+ [GAUDI_PCI_DMA_1] = 0,
+ [GAUDI_PCI_DMA_2] = 1,
+ [GAUDI_PCI_DMA_3] = 5,
+ [GAUDI_HBM_DMA_1] = 2,
+ [GAUDI_HBM_DMA_2] = 3,
+ [GAUDI_HBM_DMA_3] = 4,
+ [GAUDI_HBM_DMA_4] = 6,
+ [GAUDI_HBM_DMA_5] = 7
+};
+
+static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
+ [0] = GAUDI_QUEUE_ID_DMA_0_0,
+ [1] = GAUDI_QUEUE_ID_DMA_0_1,
+ [2] = GAUDI_QUEUE_ID_DMA_0_2,
+ [3] = GAUDI_QUEUE_ID_DMA_0_3,
+ [4] = GAUDI_QUEUE_ID_DMA_1_0,
+ [5] = GAUDI_QUEUE_ID_DMA_1_1,
+ [6] = GAUDI_QUEUE_ID_DMA_1_2,
+ [7] = GAUDI_QUEUE_ID_DMA_1_3,
+ [8] = GAUDI_QUEUE_ID_DMA_5_0,
+ [9] = GAUDI_QUEUE_ID_DMA_5_1,
+ [10] = GAUDI_QUEUE_ID_DMA_5_2,
+ [11] = GAUDI_QUEUE_ID_DMA_5_3
+};
+
+static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
+ [PACKET_WREG_32] = sizeof(struct packet_wreg32),
+ [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
+ [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
+ [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
+ [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
+ [PACKET_REPEAT] = sizeof(struct packet_repeat),
+ [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
+ [PACKET_FENCE] = sizeof(struct packet_fence),
+ [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
+ [PACKET_NOP] = sizeof(struct packet_nop),
+ [PACKET_STOP] = sizeof(struct packet_stop),
+ [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
+ [PACKET_WAIT] = sizeof(struct packet_wait),
+ [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
+};
+
+static const u32 gaudi_all_events[] = {
+ GAUDI_EVENT_PCIE_CORE_SERR,
+ GAUDI_EVENT_PCIE_CORE_DERR,
+ GAUDI_EVENT_PCIE_IF_SERR,
+ GAUDI_EVENT_PCIE_IF_DERR,
+ GAUDI_EVENT_PCIE_PHY_SERR,
+ GAUDI_EVENT_PCIE_PHY_DERR,
+ GAUDI_EVENT_TPC0_SERR,
+ GAUDI_EVENT_TPC1_SERR,
+ GAUDI_EVENT_TPC2_SERR,
+ GAUDI_EVENT_TPC3_SERR,
+ GAUDI_EVENT_TPC4_SERR,
+ GAUDI_EVENT_TPC5_SERR,
+ GAUDI_EVENT_TPC6_SERR,
+ GAUDI_EVENT_TPC7_SERR,
+ GAUDI_EVENT_TPC0_DERR,
+ GAUDI_EVENT_TPC1_DERR,
+ GAUDI_EVENT_TPC2_DERR,
+ GAUDI_EVENT_TPC3_DERR,
+ GAUDI_EVENT_TPC4_DERR,
+ GAUDI_EVENT_TPC5_DERR,
+ GAUDI_EVENT_TPC6_DERR,
+ GAUDI_EVENT_TPC7_DERR,
+ GAUDI_EVENT_MME0_ACC_SERR,
+ GAUDI_EVENT_MME0_ACC_DERR,
+ GAUDI_EVENT_MME0_SBAB_SERR,
+ GAUDI_EVENT_MME0_SBAB_DERR,
+ GAUDI_EVENT_MME1_ACC_SERR,
+ GAUDI_EVENT_MME1_ACC_DERR,
+ GAUDI_EVENT_MME1_SBAB_SERR,
+ GAUDI_EVENT_MME1_SBAB_DERR,
+ GAUDI_EVENT_MME2_ACC_SERR,
+ GAUDI_EVENT_MME2_ACC_DERR,
+ GAUDI_EVENT_MME2_SBAB_SERR,
+ GAUDI_EVENT_MME2_SBAB_DERR,
+ GAUDI_EVENT_MME3_ACC_SERR,
+ GAUDI_EVENT_MME3_ACC_DERR,
+ GAUDI_EVENT_MME3_SBAB_SERR,
+ GAUDI_EVENT_MME3_SBAB_DERR,
+ GAUDI_EVENT_DMA0_SERR_ECC,
+ GAUDI_EVENT_DMA1_SERR_ECC,
+ GAUDI_EVENT_DMA2_SERR_ECC,
+ GAUDI_EVENT_DMA3_SERR_ECC,
+ GAUDI_EVENT_DMA4_SERR_ECC,
+ GAUDI_EVENT_DMA5_SERR_ECC,
+ GAUDI_EVENT_DMA6_SERR_ECC,
+ GAUDI_EVENT_DMA7_SERR_ECC,
+ GAUDI_EVENT_DMA0_DERR_ECC,
+ GAUDI_EVENT_DMA1_DERR_ECC,
+ GAUDI_EVENT_DMA2_DERR_ECC,
+ GAUDI_EVENT_DMA3_DERR_ECC,
+ GAUDI_EVENT_DMA4_DERR_ECC,
+ GAUDI_EVENT_DMA5_DERR_ECC,
+ GAUDI_EVENT_DMA6_DERR_ECC,
+ GAUDI_EVENT_DMA7_DERR_ECC,
+ GAUDI_EVENT_CPU_IF_ECC_SERR,
+ GAUDI_EVENT_CPU_IF_ECC_DERR,
+ GAUDI_EVENT_PSOC_MEM_SERR,
+ GAUDI_EVENT_PSOC_CORESIGHT_SERR,
+ GAUDI_EVENT_PSOC_MEM_DERR,
+ GAUDI_EVENT_PSOC_CORESIGHT_DERR,
+ GAUDI_EVENT_SRAM0_SERR,
+ GAUDI_EVENT_SRAM1_SERR,
+ GAUDI_EVENT_SRAM2_SERR,
+ GAUDI_EVENT_SRAM3_SERR,
+ GAUDI_EVENT_SRAM7_SERR,
+ GAUDI_EVENT_SRAM6_SERR,
+ GAUDI_EVENT_SRAM5_SERR,
+ GAUDI_EVENT_SRAM4_SERR,
+ GAUDI_EVENT_SRAM8_SERR,
+ GAUDI_EVENT_SRAM9_SERR,
+ GAUDI_EVENT_SRAM10_SERR,
+ GAUDI_EVENT_SRAM11_SERR,
+ GAUDI_EVENT_SRAM15_SERR,
+ GAUDI_EVENT_SRAM14_SERR,
+ GAUDI_EVENT_SRAM13_SERR,
+ GAUDI_EVENT_SRAM12_SERR,
+ GAUDI_EVENT_SRAM16_SERR,
+ GAUDI_EVENT_SRAM17_SERR,
+ GAUDI_EVENT_SRAM18_SERR,
+ GAUDI_EVENT_SRAM19_SERR,
+ GAUDI_EVENT_SRAM23_SERR,
+ GAUDI_EVENT_SRAM22_SERR,
+ GAUDI_EVENT_SRAM21_SERR,
+ GAUDI_EVENT_SRAM20_SERR,
+ GAUDI_EVENT_SRAM24_SERR,
+ GAUDI_EVENT_SRAM25_SERR,
+ GAUDI_EVENT_SRAM26_SERR,
+ GAUDI_EVENT_SRAM27_SERR,
+ GAUDI_EVENT_SRAM31_SERR,
+ GAUDI_EVENT_SRAM30_SERR,
+ GAUDI_EVENT_SRAM29_SERR,
+ GAUDI_EVENT_SRAM28_SERR,
+ GAUDI_EVENT_SRAM0_DERR,
+ GAUDI_EVENT_SRAM1_DERR,
+ GAUDI_EVENT_SRAM2_DERR,
+ GAUDI_EVENT_SRAM3_DERR,
+ GAUDI_EVENT_SRAM7_DERR,
+ GAUDI_EVENT_SRAM6_DERR,
+ GAUDI_EVENT_SRAM5_DERR,
+ GAUDI_EVENT_SRAM4_DERR,
+ GAUDI_EVENT_SRAM8_DERR,
+ GAUDI_EVENT_SRAM9_DERR,
+ GAUDI_EVENT_SRAM10_DERR,
+ GAUDI_EVENT_SRAM11_DERR,
+ GAUDI_EVENT_SRAM15_DERR,
+ GAUDI_EVENT_SRAM14_DERR,
+ GAUDI_EVENT_SRAM13_DERR,
+ GAUDI_EVENT_SRAM12_DERR,
+ GAUDI_EVENT_SRAM16_DERR,
+ GAUDI_EVENT_SRAM17_DERR,
+ GAUDI_EVENT_SRAM18_DERR,
+ GAUDI_EVENT_SRAM19_DERR,
+ GAUDI_EVENT_SRAM23_DERR,
+ GAUDI_EVENT_SRAM22_DERR,
+ GAUDI_EVENT_SRAM21_DERR,
+ GAUDI_EVENT_SRAM20_DERR,
+ GAUDI_EVENT_SRAM24_DERR,
+ GAUDI_EVENT_SRAM25_DERR,
+ GAUDI_EVENT_SRAM26_DERR,
+ GAUDI_EVENT_SRAM27_DERR,
+ GAUDI_EVENT_SRAM31_DERR,
+ GAUDI_EVENT_SRAM30_DERR,
+ GAUDI_EVENT_SRAM29_DERR,
+ GAUDI_EVENT_SRAM28_DERR,
+ GAUDI_EVENT_NIC0_SERR,
+ GAUDI_EVENT_NIC1_SERR,
+ GAUDI_EVENT_NIC2_SERR,
+ GAUDI_EVENT_NIC3_SERR,
+ GAUDI_EVENT_NIC4_SERR,
+ GAUDI_EVENT_NIC0_DERR,
+ GAUDI_EVENT_NIC1_DERR,
+ GAUDI_EVENT_NIC2_DERR,
+ GAUDI_EVENT_NIC3_DERR,
+ GAUDI_EVENT_NIC4_DERR,
+ GAUDI_EVENT_DMA_IF0_SERR,
+ GAUDI_EVENT_DMA_IF1_SERR,
+ GAUDI_EVENT_DMA_IF2_SERR,
+ GAUDI_EVENT_DMA_IF3_SERR,
+ GAUDI_EVENT_DMA_IF0_DERR,
+ GAUDI_EVENT_DMA_IF1_DERR,
+ GAUDI_EVENT_DMA_IF2_DERR,
+ GAUDI_EVENT_DMA_IF3_DERR,
+ GAUDI_EVENT_GIC500,
+ GAUDI_EVENT_HBM_0_SERR,
+ GAUDI_EVENT_HBM_1_SERR,
+ GAUDI_EVENT_HBM_2_SERR,
+ GAUDI_EVENT_HBM_3_SERR,
+ GAUDI_EVENT_HBM_0_DERR,
+ GAUDI_EVENT_HBM_1_DERR,
+ GAUDI_EVENT_HBM_2_DERR,
+ GAUDI_EVENT_HBM_3_DERR,
+ GAUDI_EVENT_MMU_SERR,
+ GAUDI_EVENT_MMU_DERR,
+ GAUDI_EVENT_PCIE_DEC,
+ GAUDI_EVENT_TPC0_DEC,
+ GAUDI_EVENT_TPC1_DEC,
+ GAUDI_EVENT_TPC2_DEC,
+ GAUDI_EVENT_TPC3_DEC,
+ GAUDI_EVENT_TPC4_DEC,
+ GAUDI_EVENT_TPC5_DEC,
+ GAUDI_EVENT_TPC6_DEC,
+ GAUDI_EVENT_TPC7_DEC,
+ GAUDI_EVENT_AXI_ECC,
+ GAUDI_EVENT_L2_RAM_ECC,
+ GAUDI_EVENT_MME0_WBC_RSP,
+ GAUDI_EVENT_MME0_SBAB0_RSP,
+ GAUDI_EVENT_MME1_WBC_RSP,
+ GAUDI_EVENT_MME1_SBAB0_RSP,
+ GAUDI_EVENT_MME2_WBC_RSP,
+ GAUDI_EVENT_MME2_SBAB0_RSP,
+ GAUDI_EVENT_MME3_WBC_RSP,
+ GAUDI_EVENT_MME3_SBAB0_RSP,
+ GAUDI_EVENT_PLL0,
+ GAUDI_EVENT_PLL1,
+ GAUDI_EVENT_PLL2,
+ GAUDI_EVENT_PLL3,
+ GAUDI_EVENT_PLL4,
+ GAUDI_EVENT_PLL5,
+ GAUDI_EVENT_PLL6,
+ GAUDI_EVENT_PLL7,
+ GAUDI_EVENT_PLL8,
+ GAUDI_EVENT_PLL9,
+ GAUDI_EVENT_PLL10,
+ GAUDI_EVENT_PLL11,
+ GAUDI_EVENT_PLL12,
+ GAUDI_EVENT_PLL13,
+ GAUDI_EVENT_PLL14,
+ GAUDI_EVENT_PLL15,
+ GAUDI_EVENT_PLL16,
+ GAUDI_EVENT_PLL17,
+ GAUDI_EVENT_CPU_AXI_SPLITTER,
+ GAUDI_EVENT_PSOC_AXI_DEC,
+ GAUDI_EVENT_PSOC_PRSTN_FALL,
+ GAUDI_EVENT_TPC0_BMON_SPMU,
+ GAUDI_EVENT_TPC0_KRN_ERR,
+ GAUDI_EVENT_TPC1_BMON_SPMU,
+ GAUDI_EVENT_TPC1_KRN_ERR,
+ GAUDI_EVENT_TPC2_BMON_SPMU,
+ GAUDI_EVENT_TPC2_KRN_ERR,
+ GAUDI_EVENT_TPC3_BMON_SPMU,
+ GAUDI_EVENT_TPC3_KRN_ERR,
+ GAUDI_EVENT_TPC4_BMON_SPMU,
+ GAUDI_EVENT_TPC4_KRN_ERR,
+ GAUDI_EVENT_TPC5_BMON_SPMU,
+ GAUDI_EVENT_TPC5_KRN_ERR,
+ GAUDI_EVENT_TPC6_BMON_SPMU,
+ GAUDI_EVENT_TPC6_KRN_ERR,
+ GAUDI_EVENT_TPC7_BMON_SPMU,
+ GAUDI_EVENT_TPC7_KRN_ERR,
+ GAUDI_EVENT_MMU_PAGE_FAULT,
+ GAUDI_EVENT_MMU_WR_PERM,
+ GAUDI_EVENT_DMA_BM_CH0,
+ GAUDI_EVENT_DMA_BM_CH1,
+ GAUDI_EVENT_DMA_BM_CH2,
+ GAUDI_EVENT_DMA_BM_CH3,
+ GAUDI_EVENT_DMA_BM_CH4,
+ GAUDI_EVENT_DMA_BM_CH5,
+ GAUDI_EVENT_DMA_BM_CH6,
+ GAUDI_EVENT_DMA_BM_CH7,
+ GAUDI_EVENT_HBM0_SPI_0,
+ GAUDI_EVENT_HBM0_SPI_1,
+ GAUDI_EVENT_HBM1_SPI_0,
+ GAUDI_EVENT_HBM1_SPI_1,
+ GAUDI_EVENT_HBM2_SPI_0,
+ GAUDI_EVENT_HBM2_SPI_1,
+ GAUDI_EVENT_HBM3_SPI_0,
+ GAUDI_EVENT_HBM3_SPI_1,
+ GAUDI_EVENT_RAZWI_OR_ADC,
+ GAUDI_EVENT_TPC0_QM,
+ GAUDI_EVENT_TPC1_QM,
+ GAUDI_EVENT_TPC2_QM,
+ GAUDI_EVENT_TPC3_QM,
+ GAUDI_EVENT_TPC4_QM,
+ GAUDI_EVENT_TPC5_QM,
+ GAUDI_EVENT_TPC6_QM,
+ GAUDI_EVENT_TPC7_QM,
+ GAUDI_EVENT_MME0_QM,
+ GAUDI_EVENT_MME2_QM,
+ GAUDI_EVENT_DMA0_QM,
+ GAUDI_EVENT_DMA1_QM,
+ GAUDI_EVENT_DMA2_QM,
+ GAUDI_EVENT_DMA3_QM,
+ GAUDI_EVENT_DMA4_QM,
+ GAUDI_EVENT_DMA5_QM,
+ GAUDI_EVENT_DMA6_QM,
+ GAUDI_EVENT_DMA7_QM,
+ GAUDI_EVENT_NIC0_QM0,
+ GAUDI_EVENT_NIC0_QM1,
+ GAUDI_EVENT_NIC1_QM0,
+ GAUDI_EVENT_NIC1_QM1,
+ GAUDI_EVENT_NIC2_QM0,
+ GAUDI_EVENT_NIC2_QM1,
+ GAUDI_EVENT_NIC3_QM0,
+ GAUDI_EVENT_NIC3_QM1,
+ GAUDI_EVENT_NIC4_QM0,
+ GAUDI_EVENT_NIC4_QM1,
+ GAUDI_EVENT_DMA0_CORE,
+ GAUDI_EVENT_DMA1_CORE,
+ GAUDI_EVENT_DMA2_CORE,
+ GAUDI_EVENT_DMA3_CORE,
+ GAUDI_EVENT_DMA4_CORE,
+ GAUDI_EVENT_DMA5_CORE,
+ GAUDI_EVENT_DMA6_CORE,
+ GAUDI_EVENT_DMA7_CORE,
+ GAUDI_EVENT_FIX_POWER_ENV_S,
+ GAUDI_EVENT_FIX_POWER_ENV_E,
+ GAUDI_EVENT_FIX_THERMAL_ENV_S,
+ GAUDI_EVENT_FIX_THERMAL_ENV_E,
+ GAUDI_EVENT_RAZWI_OR_ADC_SW
+};
+
+static const char * const
+gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
+ "tpc_address_exceed_slm",
+ "tpc_div_by_0",
+ "tpc_spu_mac_overflow",
+ "tpc_spu_addsub_overflow",
+ "tpc_spu_abs_overflow",
+ "tpc_spu_fp_dst_nan_inf",
+ "tpc_spu_fp_dst_denorm",
+ "tpc_vpu_mac_overflow",
+ "tpc_vpu_addsub_overflow",
+ "tpc_vpu_abs_overflow",
+ "tpc_vpu_fp_dst_nan_inf",
+ "tpc_vpu_fp_dst_denorm",
+ "tpc_assertions",
+ "tpc_illegal_instruction",
+ "tpc_pc_wrap_around",
+ "tpc_qm_sw_err",
+ "tpc_hbw_rresp_err",
+ "tpc_hbw_bresp_err",
+ "tpc_lbw_rresp_err",
+ "tpc_lbw_bresp_err"
+};
+
+static const char * const
+gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
+ "PQ AXI HBW error",
+ "CQ AXI HBW error",
+ "CP AXI HBW error",
+ "CP error due to undefined OPCODE",
+ "CP encountered STOP OPCODE",
+ "CP AXI LBW error",
+ "CP WRREG32 or WRBULK returned error",
+ "N/A",
+ "FENCE 0 inc over max value and clipped",
+ "FENCE 1 inc over max value and clipped",
+ "FENCE 2 inc over max value and clipped",
+ "FENCE 3 inc over max value and clipped",
+ "FENCE 0 dec under min value and clipped",
+ "FENCE 1 dec under min value and clipped",
+ "FENCE 2 dec under min value and clipped",
+ "FENCE 3 dec under min value and clipped"
+};
+
+static const char * const
+gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
+ "Choice push while full error",
+ "Choice Q watchdog error",
+ "MSG AXI LBW returned with error"
+};
+
+static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
+ QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
+ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
+ QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
+ QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
+};
+
+static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
+ u64 phys_addr);
+static int gaudi_send_job_on_qman0(struct hl_device *hdev,
+ struct hl_cs_job *job);
+static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
+ u32 size, u64 val);
+static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
+ u32 tpc_id);
+static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
+static int gaudi_armcp_info_get(struct hl_device *hdev);
+static void gaudi_disable_clock_gating(struct hl_device *hdev);
+static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
+
+static int gaudi_get_fixed_properties(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int i;
+
+ if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
+ dev_err(hdev->dev,
+ "Number of H/W queues must be smaller than %d\n",
+ HL_MAX_QUEUES);
+ return -EFAULT;
+ }
+
+ for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
+ if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
+ prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
+ prop->hw_queues_props[i].driver_only = 0;
+ prop->hw_queues_props[i].requires_kernel_cb = 1;
+ } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
+ prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
+ prop->hw_queues_props[i].driver_only = 1;
+ prop->hw_queues_props[i].requires_kernel_cb = 0;
+ } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
+ prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
+ prop->hw_queues_props[i].driver_only = 0;
+ prop->hw_queues_props[i].requires_kernel_cb = 0;
+ } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
+ prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
+ prop->hw_queues_props[i].driver_only = 0;
+ prop->hw_queues_props[i].requires_kernel_cb = 0;
+ }
+ }
+
+ for (; i < HL_MAX_QUEUES; i++)
+ prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
+
+ prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
+
+ prop->dram_base_address = DRAM_PHYS_BASE;
+ prop->dram_size = GAUDI_HBM_SIZE_32GB;
+ prop->dram_end_address = prop->dram_base_address +
+ prop->dram_size;
+ prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
+
+ prop->sram_base_address = SRAM_BASE_ADDR;
+ prop->sram_size = SRAM_SIZE;
+ prop->sram_end_address = prop->sram_base_address +
+ prop->sram_size;
+ prop->sram_user_base_address = prop->sram_base_address +
+ SRAM_USER_BASE_OFFSET;
+
+ prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
+ if (hdev->pldm)
+ prop->mmu_pgt_size = 0x800000; /* 8MB */
+ else
+ prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
+ prop->mmu_pte_size = HL_PTE_SIZE;
+ prop->mmu_hop_table_size = HOP_TABLE_SIZE;
+ prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
+ prop->dram_page_size = PAGE_SIZE_2MB;
+
+ prop->pmmu.hop0_shift = HOP0_SHIFT;
+ prop->pmmu.hop1_shift = HOP1_SHIFT;
+ prop->pmmu.hop2_shift = HOP2_SHIFT;
+ prop->pmmu.hop3_shift = HOP3_SHIFT;
+ prop->pmmu.hop4_shift = HOP4_SHIFT;
+ prop->pmmu.hop0_mask = HOP0_MASK;
+ prop->pmmu.hop1_mask = HOP1_MASK;
+ prop->pmmu.hop2_mask = HOP2_MASK;
+ prop->pmmu.hop3_mask = HOP3_MASK;
+ prop->pmmu.hop4_mask = HOP4_MASK;
+ prop->pmmu.start_addr = VA_HOST_SPACE_START;
+ prop->pmmu.end_addr =
+ (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
+ prop->pmmu.page_size = PAGE_SIZE_4KB;
+
+ /* PMMU and HPMMU are the same except of page size */
+ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
+ prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
+
+ /* shifts and masks are the same in PMMU and DMMU */
+ memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
+ prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
+ prop->dmmu.end_addr = VA_HOST_SPACE_END;
+ prop->dmmu.page_size = PAGE_SIZE_2MB;
+
+ prop->cfg_size = CFG_SIZE;
+ prop->max_asid = MAX_ASID;
+ prop->num_of_events = GAUDI_EVENT_SIZE;
+ prop->tpc_enabled_mask = TPC_ENABLED_MASK;
+
+ prop->max_power_default = MAX_POWER_DEFAULT;
+
+ prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
+ prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
+
+ prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
+ prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
+
+ strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
+ CARD_NAME_MAX_LEN);
+
+ return 0;
+}
+
+static int gaudi_pci_bars_map(struct hl_device *hdev)
+{
+ static const char * const name[] = {"SRAM", "CFG", "HBM"};
+ bool is_wc[3] = {false, false, true};
+ int rc;
+
+ rc = hl_pci_bars_map(hdev, name, is_wc);
+ if (rc)
+ return rc;
+
+ hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
+ (CFG_BASE - SPI_FLASH_BASE_ADDR);
+
+ return 0;
+}
+
+static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
+{
+ struct gaudi_device *gaudi = hdev->asic_specific;
+ u64 old_addr = addr;
+ int rc;
+
+ if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
+ return old_addr;
+
+ /* Inbound Region 2 - Bar 4 - Point to HBM */
+ rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
+ if (rc)
+ return U64_MAX;
+
+ if (gaudi) {
+ old_addr = gaudi->hbm_bar_cur_addr;
+ gaudi->hbm_bar_cur_addr = addr;
+ }
+
+ return old_addr;
+}
+
+static int gaudi_init_iatu(struct hl_device *hdev)
+{
+ int rc = 0;
+
+ /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
+ rc = hl_pci_iatu_write(hdev, 0x314,
+ lower_32_bits(SPI_FLASH_BASE_ADDR));
+ rc |= hl_pci_iatu_write(hdev, 0x318,
+ upper_32_bits(SPI_FLASH_BASE_ADDR));
+ rc |= hl_pci_iatu_write(hdev, 0x300, 0);
+ /* Enable + Bar match + match enable */
+ rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
+
+ if (rc)
+ return -EIO;
+
+ return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
+ HOST_PHYS_BASE, HOST_PHYS_SIZE);
+}
+
+static int gaudi_early_init(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct pci_dev *pdev = hdev->pdev;
+ int rc;
+
+ rc = gaudi_get_fixed_properties(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to get fixed properties\n");
+ return rc;
+ }
+
+ /* Check BAR sizes */
+ if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
+ dev_err(hdev->dev,
+ "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
+ SRAM_BAR_ID,
+ (unsigned long long) pci_resource_len(pdev,
+ SRAM_BAR_ID),
+ SRAM_BAR_SIZE);
+ return -ENODEV;
+ }
+
+ if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
+ dev_err(hdev->dev,
+ "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
+ CFG_BAR_ID,
+ (unsigned long long) pci_resource_len(pdev,
+ CFG_BAR_ID),
+ CFG_BAR_SIZE);
+ return -ENODEV;
+ }
+
+ prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
+
+ rc = hl_pci_init(hdev);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+static int gaudi_early_fini(struct hl_device *hdev)
+{
+ hl_pci_fini(hdev);
+
+ return 0;
+}
+
+/**
+ * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ */
+static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+ prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
+ prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
+ prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
+ prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+}
+
+static int _gaudi_init_tpc_mem(struct hl_device *hdev,
+ dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct packet_lin_dma *init_tpc_mem_pkt;
+ struct hl_cs_job *job;
+ struct hl_cb *cb;
+ u64 dst_addr;
+ u32 cb_size, ctl;
+ u8 tpc_id;
+ int rc;
+
+ cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ if (!cb)
+ return -EFAULT;
+
+ init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
+ cb->kernel_address;
+ cb_size = sizeof(*init_tpc_mem_pkt);
+ memset(init_tpc_mem_pkt, 0, cb_size);
+
+ init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
+
+ ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
+ (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
+ (1 << GAUDI_PKT_CTL_RB_SHIFT) |
+ (1 << GAUDI_PKT_CTL_MB_SHIFT));
+
+ init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
+
+ init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
+ dst_addr = (prop->sram_user_base_address &
+ GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
+ GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
+ init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
+
+ job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
+ if (!job) {
+ dev_err(hdev->dev, "Failed to allocate a new job\n");
+ rc = -ENOMEM;
+ goto release_cb;
+ }
+
+ job->id = 0;
+ job->user_cb = cb;
+ job->user_cb->cs_cnt++;
+ job->user_cb_size = cb_size;
+ job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
+ job->patched_cb = job->user_cb;
+ job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
+
+ hl_debugfs_add_job(hdev, job);
+
+ rc = gaudi_send_job_on_qman0(hdev, job);
+
+ if (rc)
+ goto free_job;
+
+ for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
+ rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
+ if (rc)
+ break;
+ }
+
+free_job:
+ hl_userptr_delete_list(hdev, &job->userptr_list);
+ hl_debugfs_remove_job(hdev, job);
+ kfree(job);
+ cb->cs_cnt--;
+
+release_cb:
+ hl_cb_put(cb);
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+ return rc;
+}
+
+/*
+ * gaudi_init_tpc_mem() - Initialize TPC memories.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
+ *
+ * Return: 0 for success, negative value for error.
+ */
+static int gaudi_init_tpc_mem(struct hl_device *hdev)
+{
+ const struct firmware *fw;
+ size_t fw_size;
+ void *cpu_addr;
+ dma_addr_t dma_handle;
+ int rc;
+
+ rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
+ if (rc) {
+ dev_err(hdev->dev, "Firmware file %s is not found!\n",
+ GAUDI_TPC_FW_FILE);
+ goto out;
+ }
+
+ fw_size = fw->size;
+ cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
+ &dma_handle, GFP_KERNEL | __GFP_ZERO);
+ if (!cpu_addr) {
+ dev_err(hdev->dev,
+ "Failed to allocate %zu of dma memory for TPC kernel\n",
+ fw_size);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ memcpy(cpu_addr, fw->data, fw_size);
+
+ rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
+
+ hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
+ dma_handle);
+
+out:
+ release_firmware(fw);
+ return rc;
+}
+
+static int gaudi_late_init(struct hl_device *hdev)
+{
+ struct gaudi_device *gaudi = hdev->asic_specific;
+ int rc;
+
+ rc = gaudi->armcp_info_get(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to get armcp info\n");
+ return rc;
+ }
+
+ rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+ return rc;
+ }
+
+ WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
+
+ gaudi_fetch_psoc_frequency(hdev);
+
+ rc = gaudi_mmu_clear_pgt_range(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
+ goto disable_pci_access;
+ }
+
+ rc = gaudi_init_tpc_mem(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize TPC memories\n");
+ goto disable_pci_access;
+ }
+
+ return 0;
+
+disable_pci_access:
+ hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+
+ return rc;
+}
+
+static void gaudi_late_fini(struct hl_device *hdev)
+{
+ const struct hwmon_channel_info **channel_info_arr;
+ int i = 0;
+
+ if (!hdev->hl_chip_info->info)
+ return;
+
+ channel_info_arr = hdev->hl_chip_info->info;
+
+ while (channel_info_arr[i]) {
+ kfree(channel_info_arr[i]->config);
+ kfree(channel_info_arr[i]);
+ i++;
+ }
+
+ kfree(channel_info_arr);
+
+ hdev->hl_chip_info->info = NULL;
+}
+
+static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
+{
+ dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
+ void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
+ int i, j, rc = 0;
+
+ /*
+ * The device CPU works with 40-bits addresses, while bit 39 must be set
+ * to '1' when accessing the host.
+ * Bits 49:39 of the full host address are saved for a later
+ * configuration of the HW to perform extension to 50 bits.
+ * Because there is a single HW register that holds the extension bits,
+ * these bits must be identical in all allocated range.
+ */
+
+ for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
+ virt_addr_arr[i] =
+ hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+ HL_CPU_ACCESSIBLE_MEM_SIZE,
+ &dma_addr_arr[i],
+ GFP_KERNEL | __GFP_ZERO);
+ if (!virt_addr_arr[i]) {
+ rc = -ENOMEM;
+ goto free_dma_mem_arr;
+ }
+
+ end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
+ if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
+ GAUDI_CPU_PCI_MSB_ADDR(end_addr))
+ break;
+ }
+
+ if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
+ dev_err(hdev->dev,
+ "MSB of CPU accessible DMA memory are not identical in all range\n");
+ rc = -EFAULT;
+ goto free_dma_mem_arr;
+ }
+
+ hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
+ hdev->cpu_accessible_dma_address = dma_addr_arr[i];
+ hdev->cpu_pci_msb_addr =
+ GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
+
+ GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
+
+free_dma_mem_arr:
+ for (j = 0 ; j < i ; j++)
+ hdev->asic_funcs->asic_dma_free_coherent(hdev,
+ HL_CPU_ACCESSIBLE_MEM_SIZE,
+ virt_addr_arr[j],
+ dma_addr_arr[j]);
+
+ return r