summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2020-07-24 20:22:25 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2020-07-24 20:22:25 +0200
commit860e73b49cd933c708e3e1e1e07cdea81b6acd1c (patch)
treed79deee0e36096f56cadcb9abb0be9864bac9e7c
parent54918b8ed1e52ae5f2a521bdf014e4fba797c920 (diff)
parent94f8be9eb065412cf069efd45053d33e8911fa9e (diff)
Merge tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next
Oded writes: This tag contains the following changes for kernel 5.9-rc1: - Remove rate limiters from GAUDI configuration (no longer needed). - Set maximum amount of in-flight CS per ASIC type and increase the maximum amount for GAUDI. - Refactor signal/wait command submissions code - Calculate trace frequency from PLLs to show accurate profiling data - Rephrase error messages to make them more clear to the common user - Add statistics of dropped CS (counter per possible reason for drop) - Get ECC information from firmware - Remove support for partial SoC reset in Gaudi - Halt device CPU only when reset is certain to happen. Sometimes we abort the reset procedure and in that case we can't leave device CPU in halt mode. - set each CQ to its own work queue to prevent a race between completions on different CQs. - Use queue pi/ci in order to determine queue occupancy. This is done to make the code reusable between current and future ASICs. - Add more validations for user inputs. - Refactor PCIe controller configuration to make the code reusable between current and future ASICs. - Update firmware interface headers to latest version - Move all common code to a dedicated common sub-folder * tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux: (28 commits) habanalabs: Fix memory leak in error flow of context initialization habanalabs: use no flags on MMU cache invalidation habanalabs: enable device before hw_init() habanalabs: create internal CB pool habanalabs: update hl_boot_if.h from firmware habanalabs: create common folder habanalabs: check for DMA errors when clearing memory habanalabs: verify queue can contain all cs jobs habanalabs: Assign each CQ with its own work queue habanalabs: halt device CPU only upon certain reset habanalabs: remove unused hash habanalabs: use queue pi/ci in order to determine queue occupancy habanalabs: configure maximum queues per asic habanalabs: remove soft-reset support from GAUDI habanalabs: PCIe iATU refactoring habanalabs: Extract ECC information from FW habanalabs: Add dropped cs statistics info struct habanalabs: extract cpu boot status lookup habanalabs: rephrase error messages habanalabs: Increase queues depth ...
-rw-r--r--drivers/misc/habanalabs/Makefile11
-rw-r--r--drivers/misc/habanalabs/common/Makefile9
-rw-r--r--drivers/misc/habanalabs/common/asid.c (renamed from drivers/misc/habanalabs/asid.c)0
-rw-r--r--drivers/misc/habanalabs/common/command_buffer.c (renamed from drivers/misc/habanalabs/command_buffer.c)82
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c (renamed from drivers/misc/habanalabs/command_submission.c)97
-rw-r--r--drivers/misc/habanalabs/common/context.c (renamed from drivers/misc/habanalabs/context.c)39
-rw-r--r--drivers/misc/habanalabs/common/debugfs.c (renamed from drivers/misc/habanalabs/debugfs.c)0
-rw-r--r--drivers/misc/habanalabs/common/device.c (renamed from drivers/misc/habanalabs/device.c)88
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c (renamed from drivers/misc/habanalabs/firmware_if.c)101
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h (renamed from drivers/misc/habanalabs/habanalabs.h)172
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c (renamed from drivers/misc/habanalabs/habanalabs_drv.c)1
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c (renamed from drivers/misc/habanalabs/habanalabs_ioctl.c)24
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c (renamed from drivers/misc/habanalabs/hw_queue.c)165
-rw-r--r--drivers/misc/habanalabs/common/hwmon.c (renamed from drivers/misc/habanalabs/hwmon.c)0
-rw-r--r--drivers/misc/habanalabs/common/irq.c (renamed from drivers/misc/habanalabs/irq.c)13
-rw-r--r--drivers/misc/habanalabs/common/memory.c (renamed from drivers/misc/habanalabs/memory.c)3
-rw-r--r--drivers/misc/habanalabs/common/mmu.c (renamed from drivers/misc/habanalabs/mmu.c)1
-rw-r--r--drivers/misc/habanalabs/common/pci.c (renamed from drivers/misc/habanalabs/pci.c)136
-rw-r--r--drivers/misc/habanalabs/common/sysfs.c (renamed from drivers/misc/habanalabs/sysfs.c)3
-rw-r--r--drivers/misc/habanalabs/gaudi/Makefile2
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi.c894
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudiP.h16
-rw-r--r--drivers/misc/habanalabs/gaudi/gaudi_coresight.c6
-rw-r--r--drivers/misc/habanalabs/goya/goya.c172
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h14
-rw-r--r--drivers/misc/habanalabs/goya/goya_coresight.c6
-rw-r--r--drivers/misc/habanalabs/include/common/armcp_if.h (renamed from drivers/misc/habanalabs/include/armcp_if.h)14
-rw-r--r--drivers/misc/habanalabs/include/common/hl_boot_if.h (renamed from drivers/misc/habanalabs/include/hl_boot_if.h)14
-rw-r--r--drivers/misc/habanalabs/include/common/qman_if.h (renamed from drivers/misc/habanalabs/include/qman_if.h)0
-rw-r--r--drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h21
-rw-r--r--drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_cpu_pll_regs.h114
-rw-r--r--drivers/misc/habanalabs/include/gaudi/gaudi_masks.h3
-rw-r--r--drivers/misc/habanalabs/include/gaudi/gaudi_packets.h4
-rw-r--r--include/uapi/misc/habanalabs.h27
34 files changed, 1260 insertions, 992 deletions
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index 421ebd903069..a786c0a7de9a 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -3,16 +3,15 @@
# Makefile for HabanaLabs AI accelerators driver
#
-obj-m := habanalabs.o
+obj-$(CONFIG_HABANA_AI) := habanalabs.o
-habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
- command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
- command_submission.o mmu.o firmware_if.o pci.o
-
-habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
+include $(src)/common/Makefile
+habanalabs-y += $(HL_COMMON_FILES)
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
include $(src)/gaudi/Makefile
habanalabs-y += $(HL_GAUDI_FILES)
+
+habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile
new file mode 100644
index 000000000000..97d03b5c8683
--- /dev/null
+++ b/drivers/misc/habanalabs/common/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/common
+
+HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
+ common/asid.o common/habanalabs_ioctl.o \
+ common/command_buffer.o common/hw_queue.o common/irq.o \
+ common/sysfs.o common/hwmon.o common/memory.o \
+ common/command_submission.o common/mmu.o common/firmware_if.o \
+ common/pci.o
diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/common/asid.c
index a2fdf31cf27c..a2fdf31cf27c 100644
--- a/drivers/misc/habanalabs/asid.c
+++ b/drivers/misc/habanalabs/common/asid.c
diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 02d13f71b1df..7c38c4f7f9c0 100644
--- a/drivers/misc/habanalabs/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -10,12 +10,18 @@
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/genalloc.h>
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
{
- hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
- (void *) (uintptr_t) cb->kernel_address,
- cb->bus_address);
+ if (cb->is_internal)
+ gen_pool_free(hdev->internal_cb_pool,
+ cb->kernel_address, cb->size);
+ else
+ hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
+ (void *) (uintptr_t) cb->kernel_address,
+ cb->bus_address);
+
kfree(cb);
}
@@ -44,9 +50,10 @@ static void cb_release(struct kref *ref)
}
static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
- int ctx_id)
+ int ctx_id, bool internal_cb)
{
struct hl_cb *cb;
+ u32 cb_offset;
void *p;
/*
@@ -65,13 +72,25 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
if (!cb)
return NULL;
- if (ctx_id == HL_KERNEL_ASID_ID)
+ if (internal_cb) {
+ p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
+ if (!p) {
+ kfree(cb);
+ return NULL;
+ }
+
+ cb_offset = p - hdev->internal_cb_pool_virt_addr;
+ cb->is_internal = true;
+ cb->bus_address = hdev->internal_cb_va_base + cb_offset;
+ } else if (ctx_id == HL_KERNEL_ASID_ID) {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address, GFP_ATOMIC);
- else
+ } else {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address,
GFP_USER | __GFP_ZERO);
+ }
+
if (!p) {
dev_err(hdev->dev,
"failed to allocate %d of dma memory for CB\n",
@@ -87,7 +106,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
}
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u32 cb_size, u64 *handle, int ctx_id)
+ u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
{
struct hl_cb *cb;
bool alloc_new_cb = true;
@@ -112,28 +131,30 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
goto out_err;
}
- /* Minimum allocation must be PAGE SIZE */
- if (cb_size < PAGE_SIZE)
- cb_size = PAGE_SIZE;
-
- if (ctx_id == HL_KERNEL_ASID_ID &&
- cb_size <= hdev->asic_prop.cb_pool_cb_size) {
-
- spin_lock(&hdev->cb_pool_lock);
- if (!list_empty(&hdev->cb_pool)) {
- cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
- pool_list);
- list_del(&cb->pool_list);
- spin_unlock(&hdev->cb_pool_lock);
- alloc_new_cb = false;
- } else {
- spin_unlock(&hdev->cb_pool_lock);
- dev_dbg(hdev->dev, "CB pool is empty\n");
+ if (!internal_cb) {
+ /* Minimum allocation must be PAGE SIZE */
+ if (cb_size < PAGE_SIZE)
+ cb_size = PAGE_SIZE;
+
+ if (ctx_id == HL_KERNEL_ASID_ID &&
+ cb_size <= hdev->asic_prop.cb_pool_cb_size) {
+
+ spin_lock(&hdev->cb_pool_lock);
+ if (!list_empty(&hdev->cb_pool)) {
+ cb = list_first_entry(&hdev->cb_pool,
+ typeof(*cb), pool_list);
+ list_del(&cb->pool_list);
+ spin_unlock(&hdev->cb_pool_lock);
+ alloc_new_cb = false;
+ } else {
+ spin_unlock(&hdev->cb_pool_lock);
+ dev_dbg(hdev->dev, "CB pool is empty\n");
+ }
}
}
if (alloc_new_cb) {
- cb = hl_cb_alloc(hdev, cb_size, ctx_id);
+ cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
if (!cb) {
rc = -ENOMEM;
goto out_err;
@@ -229,8 +250,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
rc = -EINVAL;
} else {
rc = hl_cb_create(hdev, &hpriv->cb_mgr,
- args->in.cb_size, &handle,
- hpriv->ctx->asid);
+ args->in.cb_size, &handle,
+ hpriv->ctx->asid, false);
}
memset(args, 0, sizeof(*args));
@@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
idr_destroy(&mgr->cb_handles);
}
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
+ bool internal_cb)
{
u64 cb_handle;
struct hl_cb *cb;
int rc;
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
- HL_KERNEL_ASID_ID);
+ HL_KERNEL_ASID_ID, internal_cb);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate CB for the kernel driver %d\n", rc);
@@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev)
for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
- HL_KERNEL_ASID_ID);
+ HL_KERNEL_ASID_ID, false);
if (cb) {
cb->is_pool = true;
list_add(&cb->pool_list, &hdev->cb_pool);
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index b0f62cbbdc87..e096532c0e48 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -246,6 +246,18 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
kfree(job);
}
+static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+ hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
+ ctx->cs_counters.device_in_reset_drop_cnt;
+ hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
+ ctx->cs_counters.out_of_mem_drop_cnt;
+ hdev->aggregated_cs_counters.parsing_drop_cnt +=
+ ctx->cs_counters.parsing_drop_cnt;
+ hdev->aggregated_cs_counters.queue_full_drop_cnt +=
+ ctx->cs_counters.queue_full_drop_cnt;
+}
+
static void cs_do_release(struct kref *ref)
{
struct hl_cs *cs = container_of(ref, struct hl_cs,
@@ -349,6 +361,9 @@ static void cs_do_release(struct kref *ref)
dma_fence_signal(cs->fence);
dma_fence_put(cs->fence);
+ cs_counters_aggregate(hdev, cs->ctx);
+
+ kfree(cs->jobs_in_queue_cnt);
kfree(cs);
}
@@ -373,9 +388,9 @@ static void cs_timedout(struct work_struct *work)
hdev = cs->ctx->hdev;
ctx_asid = cs->ctx->asid;
- /* TODO: add information about last signaled seq and last emitted seq */
- dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
- ctx_asid, cs->sequence);
+ dev_err(hdev->dev,
+ "Command submission %llu has not finished in time!\n",
+ cs->sequence);
cs_put(cs);
@@ -418,21 +433,29 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
spin_lock(&ctx->cs_lock);
cs_cmpl->cs_seq = ctx->cs_sequence;
- other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)];
+ other = ctx->cs_pending[cs_cmpl->cs_seq &
+ (hdev->asic_prop.max_pending_cs - 1)];
if ((other) && (!dma_fence_is_signaled(other))) {
- spin_unlock(&ctx->cs_lock);
dev_dbg(hdev->dev,
"Rejecting CS because of too many in-flights CS\n");
rc = -EAGAIN;
goto free_fence;
}
+ cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+ sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+ if (!cs->jobs_in_queue_cnt) {
+ rc = -ENOMEM;
+ goto free_fence;
+ }
+
dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
ctx->asid, ctx->cs_sequence);
cs->sequence = cs_cmpl->cs_seq;
- ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] =
+ ctx->cs_pending[cs_cmpl->cs_seq &
+ (hdev->asic_prop.max_pending_cs - 1)] =
&cs_cmpl->base_fence;
ctx->cs_sequence++;
@@ -447,6 +470,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
return 0;
free_fence:
+ spin_unlock(&ctx->cs_lock);
kfree(cs_cmpl);
free_cs:
kfree(cs);
@@ -463,10 +487,12 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
void hl_cs_rollback_all(struct hl_device *hdev)
{
+ int i;
struct hl_cs *cs, *tmp;
/* flush all completions */
- flush_workqueue(hdev->cq_wq);
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ flush_workqueue(hdev->cq_wq[i]);
/* Make sure we don't have leftovers in the H/W queues mirror list */
list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
@@ -499,10 +525,18 @@ static int validate_queue_index(struct hl_device *hdev,
struct asic_fixed_properties *asic = &hdev->asic_prop;
struct hw_queue_properties *hw_queue_prop;
+ /* This must be checked here to prevent out-of-bounds access to
+ * hw_queues_props array
+ */
+ if (chunk->queue_index >= asic->max_queues) {
+ dev_err(hdev->dev, "Queue index %d is invalid\n",
+ chunk->queue_index);
+ return -EINVAL;
+ }
+
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
- if ((chunk->queue_index >= HL_MAX_QUEUES) ||
- (hw_queue_prop->type == QUEUE_TYPE_NA)) {
+ if (hw_queue_prop->type == QUEUE_TYPE_NA) {
dev_err(hdev->dev, "Queue index %d is invalid\n",
chunk->queue_index);
return -EINVAL;
@@ -630,12 +664,15 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = validate_queue_index(hdev, chunk, &queue_type,
&is_kernel_allocated_cb);
- if (rc)
+ if (rc) {
+ hpriv->ctx->cs_counters.parsing_drop_cnt++;
goto free_cs_object;
+ }
if (is_kernel_allocated_cb) {
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
if (!cb) {
+ hpriv->ctx->cs_counters.parsing_drop_cnt++;
rc = -EINVAL;
goto free_cs_object;
}
@@ -649,6 +686,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
job = hl_cs_allocate_job(hdev, queue_type,
is_kernel_allocated_cb);
if (!job) {
+ hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
if (is_kernel_allocated_cb)
@@ -681,6 +719,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = cs_parser(hpriv, job);
if (rc) {
+ hpriv->ctx->cs_counters.parsing_drop_cnt++;
dev_err(hdev->dev,
"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
cs->ctx->asid, cs->sequence, job->id, rc);
@@ -689,6 +728,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
}
if (int_queues_only) {
+ hpriv->ctx->cs_counters.parsing_drop_cnt++;
dev_err(hdev->dev,
"Reject CS %d.%llu because only internal queues jobs are present\n",
cs->ctx->asid, cs->sequence);
@@ -738,6 +778,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
struct hl_cs_job *job;
struct hl_cs *cs;
struct hl_cb *cb;
+ enum hl_queue_type q_type;
u64 *signal_seq_arr = NULL, signal_seq;
u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
int rc;
@@ -770,9 +811,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
chunk = &cs_chunk_array[0];
q_idx = chunk->queue_index;
hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
+ q_type = hw_queue_prop->type;
- if ((q_idx >= HL_MAX_QUEUES) ||
- (hw_queue_prop->type != QUEUE_TYPE_EXT)) {
+ if ((q_idx >= hdev->asic_prop.max_queues) ||
+ (!hw_queue_prop->supports_sync_stream)) {
dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
rc = -EINVAL;
goto free_cs_chunk_array;
@@ -869,25 +911,28 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
*cs_seq = cs->sequence;
- job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
+ job = hl_cs_allocate_job(hdev, q_type, true);
if (!job) {
+ ctx->cs_counters.out_of_mem_drop_cnt++;
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
goto put_cs;
}
- cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ if (cs->type == CS_TYPE_WAIT)
+ cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
+ else
+ cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
+
+ cb = hl_cb_kernel_create(hdev, cb_size,
+ q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
if (!cb) {
+ ctx->cs_counters.out_of_mem_drop_cnt++;
kfree(job);
rc = -EFAULT;
goto put_cs;
}
- if (cs->type == CS_TYPE_WAIT)
- cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
- else
- cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
-
job->id = 0;
job->cs = cs;
job->user_cb = cb;
@@ -1126,7 +1171,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
rc = PTR_ERR(fence);
if (rc == -EINVAL)
dev_notice_ratelimited(hdev->dev,
- "Can't wait on seq %llu because current CS is at seq %llu\n",
+ "Can't wait on CS %llu because current CS is at seq %llu\n",
seq, ctx->cs_sequence);
} else if (fence) {
rc = dma_fence_wait_timeout(fence, true, timeout);
@@ -1159,15 +1204,21 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
memset(args, 0, sizeof(*args));
if (rc < 0) {
- dev_err_ratelimited(hdev->dev,
- "Error %ld on waiting for CS handle %llu\n",
- rc, seq);
if (rc == -ERESTARTSYS) {
+ dev_err_ratelimited(hdev->dev,
+ "user process got signal while waiting for CS handle %llu\n",
+ seq);
args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
rc = -EINTR;
} else if (rc == -ETIMEDOUT) {
+ dev_err_ratelimited(hdev->dev,
+ "CS %llu has timed-out while user process is waiting for it\n",
+ seq);
args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
} else if (rc == -EIO) {
+ dev_err_ratelimited(hdev->dev,
+ "CS %llu has been aborted while user process is waiting for it\n",
+ seq);
args->out.status = HL_WAIT_CS_STATUS_ABORTED;
}
return rc;
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/common/context.c
index ec92b3506b1f..3e375958e73b 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -22,9 +22,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
* to this function unless the ref count is 0
*/
- for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
+ for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
dma_fence_put(ctx->cs_pending[i]);
+ kfree(ctx->cs_pending);
+
if (ctx->asid != HL_KERNEL_ASID_ID) {
/* The engines are stopped as there is no executing CS, but the
* Coresight might be still working by accessing addresses
@@ -110,8 +112,7 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
return;
dev_warn(hdev->dev,
- "Context %d closed or terminated but its CS are executing\n",
- ctx->asid);
+ "user process released device but its command submissions are still executing\n");
}
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
@@ -126,34 +127,49 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
spin_lock_init(&ctx->cs_lock);
atomic_set(&ctx->thread_ctx_switch_token, 1);
ctx->thread_ctx_switch_wait_token = 0;
+ ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
+ sizeof(struct dma_fence *),
+ GFP_KERNEL);
+ if (!ctx->cs_pending)
+ return -ENOMEM;
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
rc = hl_mmu_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "Failed to init mmu ctx module\n");
- goto mem_ctx_err;
+ goto err_free_cs_pending;
}
} else {
ctx->asid = hl_asid_alloc(hdev);
if (!ctx->asid) {
dev_err(hdev->dev, "No free ASID, failed to create context\n");
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto err_free_cs_pending;
}
rc = hl_vm_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "Failed to init mem ctx module\n");
rc = -ENOMEM;
- goto mem_ctx_err;
+ goto err_asid_free;
+ }
+
+ rc = hdev->asic_funcs->ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "ctx_init failed\n");
+ goto err_vm_ctx_fini;
}
}
return 0;
-mem_ctx_err:
- if (ctx->asid != HL_KERNEL_ASID_ID)
- hl_asid_free(hdev, ctx->asid);
+err_vm_ctx_fini:
+ hl_vm_ctx_fini(ctx);
+err_asid_free:
+ hl_asid_free(hdev, ctx->asid);
+err_free_cs_pending:
+ kfree(ctx->cs_pending);
return rc;
}
@@ -170,6 +186,7 @@ int hl_ctx_put(struct hl_ctx *ctx)
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
{
+ struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
struct dma_fence *fence;
spin_lock(&ctx->cs_lock);
@@ -179,13 +196,13 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
return ERR_PTR(-EINVAL);
}
- if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) {
+ if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
spin_unlock(&ctx->cs_lock);
return NULL;
}
fence = dma_fence_get(
- ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]);
+ ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
spin_unlock(&ctx->cs_lock);
return fence;
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index fc4372c18ce2..fc4372c18ce2 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/common/device.c
index 2b38a119704c..9919ff121067 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -249,7 +249,8 @@ static void device_cdev_sysfs_del(struct hl_device *hdev)
*/
static int device_early_init(struct hl_device *hdev)
{
- int rc;
+ int i, rc;
+ char workq_name[32];
switch (hdev->asic_type) {
case ASIC_GOYA:
@@ -274,11 +275,24 @@ static int device_early_init(struct hl_device *hdev)
if (rc)
goto early_fini;
- hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
- if (hdev->cq_wq == NULL) {
- dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
- rc = -ENOMEM;
- goto asid_fini;
+ if (hdev->asic_prop.completion_queues_count) {
+ hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
+ sizeof(*hdev->cq_wq),
+ GFP_ATOMIC);
+ if (!hdev->cq_wq) {
+ rc = -ENOMEM;
+ goto asid_fini;
+ }
+ }
+
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
+ snprintf(workq_name, 32, "hl-free-jobs-%u", i);
+ hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
+ if (hdev->cq_wq == NULL) {
+ dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
+ rc = -ENOMEM;
+ goto free_cq_wq;
+ }
}
hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
@@ -321,7 +335,10 @@ free_chip_info:
free_eq_wq:
destroy_workqueue(hdev->eq_wq);
free_cq_wq:
- destroy_workqueue(hdev->cq_wq);
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ if (hdev->cq_wq[i])
+ destroy_workqueue(hdev->cq_wq[i]);
+ kfree(hdev->cq_wq);
asid_fini:
hl_asid_fini(hdev);
early_fini:
@@ -339,6 +356,8 @@ early_fini:
*/
static void device_early_fini(struct hl_device *hdev)
{
+ int i;
+
mutex_destroy(&hdev->mmu_cache_lock);
mutex_destroy(&hdev->debug_lock);
mutex_destroy(&hdev->send_cpu_message_lock);
@@ -351,7 +370,10 @@ static void device_early_fini(struct hl_device *hdev)
kfree(hdev->hl_chip_info);
destroy_workqueue(hdev->eq_wq);
- destroy_workqueue(hdev->cq_wq);
+
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ destroy_workqueue(hdev->cq_wq[i]);
+ kfree(hdev->cq_wq);
hl_asid_fini(hdev);
@@ -838,6 +860,22 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
if (rc)
return 0;
+ if (hard_reset) {
+ /* Disable PCI access from device F/W so he won't send
+ * us additional interrupts. We disable MSI/MSI-X at
+ * the halt_engines function and we can't have the F/W
+ * sending us interrupts after that. We need to disable
+ * the access here because if the device is marked
+ * disable, the message won't be send. Also, in case
+ * of heartbeat, the device CPU is marked as disable
+ * so this message won't be sent
+ */
+ if (hl_fw_send_pci_access_msg(hdev,
+ ARMCP_PACKET_DISABLE_PCI_ACCESS))
+ dev_warn(hdev->dev,
+ "Failed to disable PCI access by F/W\n");
+ }
+
/* This also blocks future CS/VM/JOB completion operations */
hdev->disabled = true;
@@ -995,6 +1033,12 @@ again:
}
}
+ /* Device is now enabled as part of the initialization requires
+ * communication with the device firmware to get information that
+ * is required for the initialization itself
+ */
+ hdev->disabled = false;
+
rc = hdev->asic_funcs->hw_init(hdev);
if (rc) {
dev_err(hdev->dev,
@@ -1002,8 +1046,6 @@ again:
goto out_err;
}
- hdev->disabled = false;
-
/* Check that the communication with the device is working */
rc = hdev->asic_funcs->test_queues(hdev);
if (rc) {
@@ -1144,14 +1186,17 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
* because there the addresses of the completion queues are being
* passed as arguments to request_irq
*/
- hdev->completion_queue = kcalloc(cq_cnt,
- sizeof(*hdev->completion_queue),
- GFP_KERNEL);
+ if (cq_cnt) {
+ hdev->completion_queue = kcalloc(cq_cnt,
+ sizeof(*hdev->completion_queue),
+ GFP_KERNEL);
- if (!hdev->completion_queue) {
- dev_err(hdev->dev, "failed to allocate completion queues\n");
- rc = -ENOMEM;
- goto hw_queues_destroy;
+ if (!hdev->completion_queue) {
+ dev_err(hdev->dev,
+ "failed to allocate completion queues\n");
+ rc = -ENOMEM;
+ goto hw_queues_destroy;
+ }
}
for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
@@ -1162,6 +1207,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
"failed to initialize completion queue\n");
goto cq_fini;
}
+ hdev->completion_queue[i].cq_idx = i;
}
/*
@@ -1219,6 +1265,12 @@ int hl_device_init(struct hl_device *hd