Merge tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes: This tag contains the following changes for kernel 5.9-rc1: - Remove rate limiters from GAUDI configuration (no longer needed). - Set maximum amount of in-flight CS per ASIC type and increase the maximum amount for GAUDI. - Refactor signal/wait command submissions code - Calculate trace frequency from PLLs to show accurate profiling data - Rephrase error messages to make them more clear to the common user - Add statistics of dropped CS (counter per possible reason for drop) - Get ECC information from firmware - Remove support for partial SoC reset in Gaudi - Halt device CPU only when reset is certain to happen. Sometimes we abort the reset procedure and in that case we can't leave device CPU in halt mode. - set each CQ to its own work queue to prevent a race between completions on different CQs. - Use queue pi/ci in order to determine queue occupancy. This is done to make the code reusable between current and future ASICs. - Add more validations for user inputs. - Refactor PCIe controller configuration to make the code reusable between current and future ASICs. - Update firmware interface headers to latest version - Move all common code to a dedicated common sub-folder * tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux: (28 commits) habanalabs: Fix memory leak in error flow of context initialization habanalabs: use no flags on MMU cache invalidation habanalabs: enable device before hw_init() habanalabs: create internal CB pool habanalabs: update hl_boot_if.h from firmware habanalabs: create common folder habanalabs: check for DMA errors when clearing memory habanalabs: verify queue can contain all cs jobs habanalabs: Assign each CQ with its own work queue habanalabs: halt device CPU only upon certain reset habanalabs: remove unused hash habanalabs: use queue pi/ci in order to determine queue occupancy habanalabs: configure maximum queues per asic habanalabs: remove soft-reset support from GAUDI habanalabs: PCIe iATU refactoring habanalabs: Extract ECC information from FW habanalabs: Add dropped cs statistics info struct habanalabs: extract cpu boot status lookup habanalabs: rephrase error messages habanalabs: Increase queues depth ...
author: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2020-07-24 20:22:25 +0200
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2020-07-24 20:22:25 +0200
commit: 860e73b49cd933c708e3e1e1e07cdea81b6acd1c (patch)
tree: d79deee0e36096f56cadcb9abb0be9864bac9e7c
parent: 54918b8ed1e52ae5f2a521bdf014e4fba797c920 (diff)
parent: 94f8be9eb065412cf069efd45053d33e8911fa9e (diff)
34 files changed, 1260 insertions, 992 deletions
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index 421ebd903069..a786c0a7de9a 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -3,16 +3,15 @@
 # Makefile for HabanaLabs AI accelerators driver
 #
 
-obj-m	:= habanalabs.o
+obj-$(CONFIG_HABANA_AI) := habanalabs.o
 
-habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
-		command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
-		command_submission.o mmu.o firmware_if.o pci.o
-
-habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
+include $(src)/common/Makefile
+habanalabs-y += $(HL_COMMON_FILES)
 
 include $(src)/goya/Makefile
 habanalabs-y += $(HL_GOYA_FILES)
 
 include $(src)/gaudi/Makefile
 habanalabs-y += $(HL_GAUDI_FILES)
+
+habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile
new file mode 100644
index 000000000000..97d03b5c8683
--- /dev/null
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/common
+
+HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
+		common/asid.o common/habanalabs_ioctl.o \
+		common/command_buffer.o common/hw_queue.o common/irq.o \
+		common/sysfs.o common/hwmon.o common/memory.o \
+		common/command_submission.o common/mmu.o common/firmware_if.o \
+		common/pci.o
diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/common/asid.c
index a2fdf31cf27c..a2fdf31cf27c 100644
--- a/drivers/misc/habanalabs/asid.c
+++ b/drivers/misc/habanalabs/common/asid.c
diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 02d13f71b1df..7c38c4f7f9c0 100644
--- a/drivers/misc/habanalabs/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -10,12 +10,18 @@
 
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/genalloc.h>
 
 static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
 {
-	hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
-			(void *) (uintptr_t) cb->kernel_address,
-			cb->bus_address);
+	if (cb->is_internal)
+		gen_pool_free(hdev->internal_cb_pool,
+				cb->kernel_address, cb->size);
+	else
+		hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
+				(void *) (uintptr_t) cb->kernel_address,
+				cb->bus_address);
+
 	kfree(cb);
 }
 
@@ -44,9 +50,10 @@ static void cb_release(struct kref *ref)
 }
 
 static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
-					int ctx_id)
+					int ctx_id, bool internal_cb)
 {
 	struct hl_cb *cb;
+	u32 cb_offset;
 	void *p;
 
 	/*
@@ -65,13 +72,25 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 	if (!cb)
 		return NULL;
 
-	if (ctx_id == HL_KERNEL_ASID_ID)
+	if (internal_cb) {
+		p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
+		if (!p) {
+			kfree(cb);
+			return NULL;
+		}
+
+		cb_offset = p - hdev->internal_cb_pool_virt_addr;
+		cb->is_internal = true;
+		cb->bus_address =  hdev->internal_cb_va_base + cb_offset;
+	} else if (ctx_id == HL_KERNEL_ASID_ID) {
 		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
 						&cb->bus_address, GFP_ATOMIC);
-	else
+	} else {
 		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
 						&cb->bus_address,
 						GFP_USER | __GFP_ZERO);
+	}
+
 	if (!p) {
 		dev_err(hdev->dev,
 			"failed to allocate %d of dma memory for CB\n",
@@ -87,7 +106,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
 }
 
 int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
-			u32 cb_size, u64 *handle, int ctx_id)
+			u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
 {
 	struct hl_cb *cb;
 	bool alloc_new_cb = true;
@@ -112,28 +131,30 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 		goto out_err;
 	}
 
-	/* Minimum allocation must be PAGE SIZE */
-	if (cb_size < PAGE_SIZE)
-		cb_size = PAGE_SIZE;
-
-	if (ctx_id == HL_KERNEL_ASID_ID &&
-			cb_size <= hdev->asic_prop.cb_pool_cb_size) {
-
-		spin_lock(&hdev->cb_pool_lock);
-		if (!list_empty(&hdev->cb_pool)) {
-			cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
-					pool_list);
-			list_del(&cb->pool_list);
-			spin_unlock(&hdev->cb_pool_lock);
-			alloc_new_cb = false;
-		} else {
-			spin_unlock(&hdev->cb_pool_lock);
-			dev_dbg(hdev->dev, "CB pool is empty\n");
+	if (!internal_cb) {
+		/* Minimum allocation must be PAGE SIZE */
+		if (cb_size < PAGE_SIZE)
+			cb_size = PAGE_SIZE;
+
+		if (ctx_id == HL_KERNEL_ASID_ID &&
+				cb_size <= hdev->asic_prop.cb_pool_cb_size) {
+
+			spin_lock(&hdev->cb_pool_lock);
+			if (!list_empty(&hdev->cb_pool)) {
+				cb = list_first_entry(&hdev->cb_pool,
+						typeof(*cb), pool_list);
+				list_del(&cb->pool_list);
+				spin_unlock(&hdev->cb_pool_lock);
+				alloc_new_cb = false;
+			} else {
+				spin_unlock(&hdev->cb_pool_lock);
+				dev_dbg(hdev->dev, "CB pool is empty\n");
+			}
 		}
 	}
 
 	if (alloc_new_cb) {
-		cb = hl_cb_alloc(hdev, cb_size, ctx_id);
+		cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
 		if (!cb) {
 			rc = -ENOMEM;
 			goto out_err;
@@ -229,8 +250,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
 			rc = -EINVAL;
 		} else {
 			rc = hl_cb_create(hdev, &hpriv->cb_mgr,
-						args->in.cb_size, &handle,
-						hpriv->ctx->asid);
+					args->in.cb_size, &handle,
+					hpriv->ctx->asid, false);
 		}
 
 		memset(args, 0, sizeof(*args));
@@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
 	idr_destroy(&mgr->cb_handles);
 }
 
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
+					bool internal_cb)
 {
 	u64 cb_handle;
 	struct hl_cb *cb;
 	int rc;
 
 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
-			HL_KERNEL_ASID_ID);
+			HL_KERNEL_ASID_ID, internal_cb);
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to allocate CB for the kernel driver %d\n", rc);
@@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev)
 
 	for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
 		cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
-				HL_KERNEL_ASID_ID);
+				HL_KERNEL_ASID_ID, false);
 		if (cb) {
 			cb->is_pool = true;
 			list_add(&cb->pool_list, &hdev->cb_pool);
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index b0f62cbbdc87..e096532c0e48 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -246,6 +246,18 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
 	kfree(job);
 }
 
+static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+	hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
+			ctx->cs_counters.device_in_reset_drop_cnt;
+	hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
+			ctx->cs_counters.out_of_mem_drop_cnt;
+	hdev->aggregated_cs_counters.parsing_drop_cnt +=
+			ctx->cs_counters.parsing_drop_cnt;
+	hdev->aggregated_cs_counters.queue_full_drop_cnt +=
+			ctx->cs_counters.queue_full_drop_cnt;
+}
+
 static void cs_do_release(struct kref *ref)
 {
 	struct hl_cs *cs = container_of(ref, struct hl_cs,
@@ -349,6 +361,9 @@ static void cs_do_release(struct kref *ref)
 	dma_fence_signal(cs->fence);
 	dma_fence_put(cs->fence);
 
+	cs_counters_aggregate(hdev, cs->ctx);
+
+	kfree(cs->jobs_in_queue_cnt);
 	kfree(cs);
 }
 
@@ -373,9 +388,9 @@ static void cs_timedout(struct work_struct *work)
 	hdev = cs->ctx->hdev;
 	ctx_asid = cs->ctx->asid;
 
-	/* TODO: add information about last signaled seq and last emitted seq */
-	dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
-		ctx_asid, cs->sequence);
+	dev_err(hdev->dev,
+		"Command submission %llu has not finished in time!\n",
+		cs->sequence);
 
 	cs_put(cs);
 
@@ -418,21 +433,29 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	spin_lock(&ctx->cs_lock);
 
 	cs_cmpl->cs_seq = ctx->cs_sequence;
-	other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)];
+	other = ctx->cs_pending[cs_cmpl->cs_seq &
+				(hdev->asic_prop.max_pending_cs - 1)];
 	if ((other) && (!dma_fence_is_signaled(other))) {
-		spin_unlock(&ctx->cs_lock);
 		dev_dbg(hdev->dev,
 			"Rejecting CS because of too many in-flights CS\n");
 		rc = -EAGAIN;
 		goto free_fence;
 	}
 
+	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+	if (!cs->jobs_in_queue_cnt) {
+		rc = -ENOMEM;
+		goto free_fence;
+	}
+
 	dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
 			ctx->asid, ctx->cs_sequence);
 
 	cs->sequence = cs_cmpl->cs_seq;
 
-	ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] =
+	ctx->cs_pending[cs_cmpl->cs_seq &
+			(hdev->asic_prop.max_pending_cs - 1)] =
 							&cs_cmpl->base_fence;
 	ctx->cs_sequence++;
 
@@ -447,6 +470,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	return 0;
 
 free_fence:
+	spin_unlock(&ctx->cs_lock);
 	kfree(cs_cmpl);
 free_cs:
 	kfree(cs);
@@ -463,10 +487,12 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 
 void hl_cs_rollback_all(struct hl_device *hdev)
 {
+	int i;
 	struct hl_cs *cs, *tmp;
 
 	/* flush all completions */
-	flush_workqueue(hdev->cq_wq);
+	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+		flush_workqueue(hdev->cq_wq[i]);
 
 	/* Make sure we don't have leftovers in the H/W queues mirror list */
 	list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
@@ -499,10 +525,18 @@ static int validate_queue_index(struct hl_device *hdev,
 	struct asic_fixed_properties *asic = &hdev->asic_prop;
 	struct hw_queue_properties *hw_queue_prop;
 
+	/* This must be checked here to prevent out-of-bounds access to
+	 * hw_queues_props array
+	 */
+	if (chunk->queue_index >= asic->max_queues) {
+		dev_err(hdev->dev, "Queue index %d is invalid\n",
+			chunk->queue_index);
+		return -EINVAL;
+	}
+
 	hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
 
-	if ((chunk->queue_index >= HL_MAX_QUEUES) ||
-			(hw_queue_prop->type == QUEUE_TYPE_NA)) {
+	if (hw_queue_prop->type == QUEUE_TYPE_NA) {
 		dev_err(hdev->dev, "Queue index %d is invalid\n",
 			chunk->queue_index);
 		return -EINVAL;
@@ -630,12 +664,15 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 
 		rc = validate_queue_index(hdev, chunk, &queue_type,
 						&is_kernel_allocated_cb);
-		if (rc)
+		if (rc) {
+			hpriv->ctx->cs_counters.parsing_drop_cnt++;
 			goto free_cs_object;
+		}
 
 		if (is_kernel_allocated_cb) {
 			cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
 			if (!cb) {
+				hpriv->ctx->cs_counters.parsing_drop_cnt++;
 				rc = -EINVAL;
 				goto free_cs_object;
 			}
@@ -649,6 +686,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		job = hl_cs_allocate_job(hdev, queue_type,
 						is_kernel_allocated_cb);
 		if (!job) {
+			hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
 			dev_err(hdev->dev, "Failed to allocate a new job\n");
 			rc = -ENOMEM;
 			if (is_kernel_allocated_cb)
@@ -681,6 +719,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 
 		rc = cs_parser(hpriv, job);
 		if (rc) {
+			hpriv->ctx->cs_counters.parsing_drop_cnt++;
 			dev_err(hdev->dev,
 				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
 				cs->ctx->asid, cs->sequence, job->id, rc);
@@ -689,6 +728,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	}
 
 	if (int_queues_only) {
+		hpriv->ctx->cs_counters.parsing_drop_cnt++;
 		dev_err(hdev->dev,
 			"Reject CS %d.%llu because only internal queues jobs are present\n",
 			cs->ctx->asid, cs->sequence);
@@ -738,6 +778,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	struct hl_cs_job *job;
 	struct hl_cs *cs;
 	struct hl_cb *cb;
+	enum hl_queue_type q_type;
 	u64 *signal_seq_arr = NULL, signal_seq;
 	u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
 	int rc;
@@ -770,9 +811,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	chunk = &cs_chunk_array[0];
 	q_idx = chunk->queue_index;
 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
+	q_type = hw_queue_prop->type;
 
-	if ((q_idx >= HL_MAX_QUEUES) ||
-			(hw_queue_prop->type != QUEUE_TYPE_EXT)) {
+	if ((q_idx >= hdev->asic_prop.max_queues) ||
+			(!hw_queue_prop->supports_sync_stream)) {
 		dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
 		rc = -EINVAL;
 		goto free_cs_chunk_array;
@@ -869,25 +911,28 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
 	*cs_seq = cs->sequence;
 
-	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
+	job = hl_cs_allocate_job(hdev, q_type, true);
 	if (!job) {
+		ctx->cs_counters.out_of_mem_drop_cnt++;
 		dev_err(hdev->dev, "Failed to allocate a new job\n");
 		rc = -ENOMEM;
 		goto put_cs;
 	}
 
-	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+	if (cs->type == CS_TYPE_WAIT)
+		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
+	else
+		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
+
+	cb = hl_cb_kernel_create(hdev, cb_size,
+				q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
 	if (!cb) {
+		ctx->cs_counters.out_of_mem_drop_cnt++;
 		kfree(job);
 		rc = -EFAULT;
 		goto put_cs;
 	}
 
-	if (cs->type == CS_TYPE_WAIT)
-		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
-	else
-		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
-
 	job->id = 0;
 	job->cs = cs;
 	job->user_cb = cb;
@@ -1126,7 +1171,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 		rc = PTR_ERR(fence);
 		if (rc == -EINVAL)
 			dev_notice_ratelimited(hdev->dev,
-				"Can't wait on seq %llu because current CS is at seq %llu\n",
+				"Can't wait on CS %llu because current CS is at seq %llu\n",
 				seq, ctx->cs_sequence);
 	} else if (fence) {
 		rc = dma_fence_wait_timeout(fence, true, timeout);
@@ -1159,15 +1204,21 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 	memset(args, 0, sizeof(*args));
 
 	if (rc < 0) {
-		dev_err_ratelimited(hdev->dev,
-				"Error %ld on waiting for CS handle %llu\n",
-				rc, seq);
 		if (rc == -ERESTARTSYS) {
+			dev_err_ratelimited(hdev->dev,
+				"user process got signal while waiting for CS handle %llu\n",
+				seq);
 			args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
 			rc = -EINTR;
 		} else if (rc == -ETIMEDOUT) {
+			dev_err_ratelimited(hdev->dev,
+				"CS %llu has timed-out while user process is waiting for it\n",
+				seq);
 			args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
 		} else if (rc == -EIO) {
+			dev_err_ratelimited(hdev->dev,
+				"CS %llu has been aborted while user process is waiting for it\n",
+				seq);
 			args->out.status = HL_WAIT_CS_STATUS_ABORTED;
 		}
 		return rc;
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/common/context.c
index ec92b3506b1f..3e375958e73b 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -22,9 +22,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 	 * to this function unless the ref count is 0
 	 */
 
-	for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
+	for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
 		dma_fence_put(ctx->cs_pending[i]);
 
+	kfree(ctx->cs_pending);
+
 	if (ctx->asid != HL_KERNEL_ASID_ID) {
 		/* The engines are stopped as there is no executing CS, but the
 		 * Coresight might be still working by accessing addresses
@@ -110,8 +112,7 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
 		return;
 
 	dev_warn(hdev->dev,
-		"Context %d closed or terminated but its CS are executing\n",
-		ctx->asid);
+		"user process released device but its command submissions are still executing\n");
 }
 
 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
@@ -126,34 +127,49 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 	spin_lock_init(&ctx->cs_lock);
 	atomic_set(&ctx->thread_ctx_switch_token, 1);
 	ctx->thread_ctx_switch_wait_token = 0;
+	ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
+				sizeof(struct dma_fence *),
+				GFP_KERNEL);
+	if (!ctx->cs_pending)
+		return -ENOMEM;
 
 	if (is_kernel_ctx) {
 		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
 		rc = hl_mmu_ctx_init(ctx);
 		if (rc) {
 			dev_err(hdev->dev, "Failed to init mmu ctx module\n");
-			goto mem_ctx_err;
+			goto err_free_cs_pending;
 		}
 	} else {
 		ctx->asid = hl_asid_alloc(hdev);
 		if (!ctx->asid) {
 			dev_err(hdev->dev, "No free ASID, failed to create context\n");
-			return -ENOMEM;
+			rc = -ENOMEM;
+			goto err_free_cs_pending;
 		}
 
 		rc = hl_vm_ctx_init(ctx);
 		if (rc) {
 			dev_err(hdev->dev, "Failed to init mem ctx module\n");
 			rc = -ENOMEM;
-			goto mem_ctx_err;
+			goto err_asid_free;
+		}
+
+		rc = hdev->asic_funcs->ctx_init(ctx);
+		if (rc) {
+			dev_err(hdev->dev, "ctx_init failed\n");
+			goto err_vm_ctx_fini;
 		}
 	}
 
 	return 0;
 
-mem_ctx_err:
-	if (ctx->asid != HL_KERNEL_ASID_ID)
-		hl_asid_free(hdev, ctx->asid);
+err_vm_ctx_fini:
+	hl_vm_ctx_fini(ctx);
+err_asid_free:
+	hl_asid_free(hdev, ctx->asid);
+err_free_cs_pending:
+	kfree(ctx->cs_pending);
 
 	return rc;
 }
@@ -170,6 +186,7 @@ int hl_ctx_put(struct hl_ctx *ctx)
 
 struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 {
+	struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
 	struct dma_fence *fence;
 
 	spin_lock(&ctx->cs_lock);
@@ -179,13 +196,13 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) {
+	if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
 		spin_unlock(&ctx->cs_lock);
 		return NULL;
 	}
 
 	fence = dma_fence_get(
-			ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]);
+			ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
 	spin_unlock(&ctx->cs_lock);
 
 	return fence;
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index fc4372c18ce2..fc4372c18ce2 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/common/device.c
index 2b38a119704c..9919ff121067 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -249,7 +249,8 @@ static void device_cdev_sysfs_del(struct hl_device *hdev)
  */
 static int device_early_init(struct hl_device *hdev)
 {
-	int rc;
+	int i, rc;
+	char workq_name[32];
 
 	switch (hdev->asic_type) {
 	case ASIC_GOYA:
@@ -274,11 +275,24 @@ static int device_early_init(struct hl_device *hdev)
 	if (rc)
 		goto early_fini;
 
-	hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
-	if (hdev->cq_wq == NULL) {
-		dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
-		rc = -ENOMEM;
-		goto asid_fini;
+	if (hdev->asic_prop.completion_queues_count) {
+		hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
+				sizeof(*hdev->cq_wq),
+				GFP_ATOMIC);
+		if (!hdev->cq_wq) {
+			rc = -ENOMEM;
+			goto asid_fini;
+		}
+	}
+
+	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
+		snprintf(workq_name, 32, "hl-free-jobs-%u", i);
+		hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
+		if (hdev->cq_wq == NULL) {
+			dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
+			rc = -ENOMEM;
+			goto free_cq_wq;
+		}
 	}
 
 	hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
@@ -321,7 +335,10 @@ free_chip_info:
 free_eq_wq:
 	destroy_workqueue(hdev->eq_wq);
 free_cq_wq:
-	destroy_workqueue(hdev->cq_wq);
+	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+		if (hdev->cq_wq[i])
+			destroy_workqueue(hdev->cq_wq[i]);
+	kfree(hdev->cq_wq);
 asid_fini:
 	hl_asid_fini(hdev);
 early_fini:
@@ -339,6 +356,8 @@ early_fini:
  */
 static void device_early_fini(struct hl_device *hdev)
 {
+	int i;
+
 	mutex_destroy(&hdev->mmu_cache_lock);
 	mutex_destroy(&hdev->debug_lock);
 	mutex_destroy(&hdev->send_cpu_message_lock);
@@ -351,7 +370,10 @@ static void device_early_fini(struct hl_device *hdev)
 	kfree(hdev->hl_chip_info);
 
 	destroy_workqueue(hdev->eq_wq);
-	destroy_workqueue(hdev->cq_wq);
+
+	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+		destroy_workqueue(hdev->cq_wq[i]);
+	kfree(hdev->cq_wq);
 
 	hl_asid_fini(hdev);
 
@@ -838,6 +860,22 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 		if (rc)
 			return 0;
 
+		if (hard_reset) {
+			/* Disable PCI access from device F/W so he won't send
+			 * us additional interrupts. We disable MSI/MSI-X at
+			 * the halt_engines function and we can't have the F/W
+			 * sending us interrupts after that. We need to disable
+			 * the access here because if the device is marked
+			 * disable, the message won't be send. Also, in case
+			 * of heartbeat, the device CPU is marked as disable
+			 * so this message won't be sent
+			 */
+			if (hl_fw_send_pci_access_msg(hdev,
+					ARMCP_PACKET_DISABLE_PCI_ACCESS))
+				dev_warn(hdev->dev,
+					"Failed to disable PCI access by F/W\n");
+		}
+
 		/* This also blocks future CS/VM/JOB completion operations */
 		hdev->disabled = true;
 
@@ -995,6 +1033,12 @@ again:
 		}
 	}
 
+	/* Device is now enabled as part of the initialization requires
+	 * communication with the device firmware to get information that
+	 * is required for the initialization itself
+	 */
+	hdev->disabled = false;
+
 	rc = hdev->asic_funcs->hw_init(hdev);
 	if (rc) {
 		dev_err(hdev->dev,
@@ -1002,8 +1046,6 @@ again:
 		goto out_err;
 	}
 
-	hdev->disabled = false;
-
 	/* Check that the communication with the device is working */
 	rc = hdev->asic_funcs->test_queues(hdev);
 	if (rc) {
@@ -1144,14 +1186,17 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	 * because there the addresses of the completion queues are being
 	 * passed as arguments to request_irq
 	 */
-	hdev->completion_queue = kcalloc(cq_cnt,
-						sizeof(*hdev->completion_queue),
-						GFP_KERNEL);
+	if (cq_cnt) {
+		hdev->completion_queue = kcalloc(cq_cnt,
+				sizeof(*hdev->completion_queue),
+				GFP_KERNEL);
 
-	if (!hdev->completion_queue) {
-		dev_err(hdev->dev, "failed to allocate completion queues\n");
-		rc = -ENOMEM;
-		goto hw_queues_destroy;
+		if (!hdev->completion_queue) {
+			dev_err(hdev->dev,
+				"failed to allocate completion queues\n");
+			rc = -ENOMEM;
+			goto hw_queues_destroy;
+		}
 	}
 
 	for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
@@ -1162,6 +1207,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 				"failed to initialize completion queue\n");
 			goto cq_fini;
 		}
+		hdev->completion_queue[i].cq_idx = i;
 	}
 
 	/*
@@ -1219,6 +1265,12 @@ int hl_device_init(struct hl_device *hd
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2020-07-24 20:22:25 +0200
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2020-07-24 20:22:25 +0200
commit	860e73b49cd933c708e3e1e1e07cdea81b6acd1c (patch)
tree	d79deee0e36096f56cadcb9abb0be9864bac9e7c
parent	54918b8ed1e52ae5f2a521bdf014e4fba797c920 (diff)
parent	94f8be9eb065412cf069efd45053d33e8911fa9e (diff)