diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-07-24 20:22:25 +0200 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2020-07-24 20:22:25 +0200 |
commit | 860e73b49cd933c708e3e1e1e07cdea81b6acd1c (patch) | |
tree | d79deee0e36096f56cadcb9abb0be9864bac9e7c /drivers | |
parent | 54918b8ed1e52ae5f2a521bdf014e4fba797c920 (diff) | |
parent | 94f8be9eb065412cf069efd45053d33e8911fa9e (diff) |
Merge tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next
Oded writes:
This tag contains the following changes for kernel 5.9-rc1:
- Remove rate limiters from GAUDI configuration (no longer needed).
- Set maximum amount of in-flight CS per ASIC type and increase
the maximum amount for GAUDI.
- Refactor signal/wait command submissions code
- Calculate trace frequency from PLLs to show accurate profiling data
- Rephrase error messages to make them more clear to the common user
- Add statistics of dropped CS (counter per possible reason for drop)
- Get ECC information from firmware
- Remove support for partial SoC reset in Gaudi
- Halt device CPU only when reset is certain to happen. Sometimes we abort
the reset procedure and in that case we can't leave device CPU in halt
mode.
- set each CQ to its own work queue to prevent a race between completions
on different CQs.
- Use queue pi/ci in order to determine queue occupancy. This is done to
make the code reusable between current and future ASICs.
- Add more validations for user inputs.
- Refactor PCIe controller configuration to make the code reusable between
current and future ASICs.
- Update firmware interface headers to latest version
- Move all common code to a dedicated common sub-folder
* tag 'misc-habanalabs-next-2020-07-24' of git://people.freedesktop.org/~gabbayo/linux: (28 commits)
habanalabs: Fix memory leak in error flow of context initialization
habanalabs: use no flags on MMU cache invalidation
habanalabs: enable device before hw_init()
habanalabs: create internal CB pool
habanalabs: update hl_boot_if.h from firmware
habanalabs: create common folder
habanalabs: check for DMA errors when clearing memory
habanalabs: verify queue can contain all cs jobs
habanalabs: Assign each CQ with its own work queue
habanalabs: halt device CPU only upon certain reset
habanalabs: remove unused hash
habanalabs: use queue pi/ci in order to determine queue occupancy
habanalabs: configure maximum queues per asic
habanalabs: remove soft-reset support from GAUDI
habanalabs: PCIe iATU refactoring
habanalabs: Extract ECC information from FW
habanalabs: Add dropped cs statistics info struct
habanalabs: extract cpu boot status lookup
habanalabs: rephrase error messages
habanalabs: Increase queues depth
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/misc/habanalabs/Makefile | 11 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/Makefile | 9 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/asid.c (renamed from drivers/misc/habanalabs/asid.c) | 0 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/command_buffer.c (renamed from drivers/misc/habanalabs/command_buffer.c) | 82 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/command_submission.c (renamed from drivers/misc/habanalabs/command_submission.c) | 97 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/context.c (renamed from drivers/misc/habanalabs/context.c) | 39 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/debugfs.c (renamed from drivers/misc/habanalabs/debugfs.c) | 0 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/device.c (renamed from drivers/misc/habanalabs/device.c) | 88 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/firmware_if.c (renamed from drivers/misc/habanalabs/firmware_if.c) | 101 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h (renamed from drivers/misc/habanalabs/habanalabs.h) | 172 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_drv.c (renamed from drivers/misc/habanalabs/habanalabs_drv.c) | 1 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs_ioctl.c (renamed from drivers/misc/habanalabs/habanalabs_ioctl.c) | 24 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/hw_queue.c (renamed from drivers/misc/habanalabs/hw_queue.c) | 165 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/hwmon.c (renamed from drivers/misc/habanalabs/hwmon.c) | 0 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/irq.c (renamed from drivers/misc/habanalabs/irq.c) | 13 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/memory.c (renamed from drivers/misc/habanalabs/memory.c) | 3 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/mmu.c (renamed from drivers/misc/habanalabs/mmu.c) | 1 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/pci.c (renamed from drivers/misc/habanalabs/pci.c) | 136 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/sysfs.c (renamed from drivers/misc/habanalabs/sysfs.c) | 3 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/Makefile | 2 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 894 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudiP.h | 16 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi_coresight.c | 6 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 172 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goyaP.h | 14 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya_coresight.c | 6 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/common/armcp_if.h (renamed from drivers/misc/habanalabs/include/armcp_if.h) | 14 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/common/hl_boot_if.h (renamed from drivers/misc/habanalabs/include/hl_boot_if.h) | 14 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/common/qman_if.h (renamed from drivers/misc/habanalabs/include/qman_if.h) | 0 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h | 21 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_cpu_pll_regs.h | 114 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/gaudi/gaudi_masks.h | 3 | ||||
-rw-r--r-- | drivers/misc/habanalabs/include/gaudi/gaudi_packets.h | 4 |
33 files changed, 1236 insertions, 989 deletions
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index 421ebd903069..a786c0a7de9a 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -3,16 +3,15 @@ # Makefile for HabanaLabs AI accelerators driver # -obj-m := habanalabs.o +obj-$(CONFIG_HABANA_AI) := habanalabs.o -habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \ - command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \ - command_submission.o mmu.o firmware_if.o pci.o - -habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o +include $(src)/common/Makefile +habanalabs-y += $(HL_COMMON_FILES) include $(src)/goya/Makefile habanalabs-y += $(HL_GOYA_FILES) include $(src)/gaudi/Makefile habanalabs-y += $(HL_GAUDI_FILES) + +habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o diff --git a/drivers/misc/habanalabs/common/Makefile b/drivers/misc/habanalabs/common/Makefile new file mode 100644 index 000000000000..97d03b5c8683 --- /dev/null +++ b/drivers/misc/habanalabs/common/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/common + +HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ + common/asid.o common/habanalabs_ioctl.o \ + common/command_buffer.o common/hw_queue.o common/irq.o \ + common/sysfs.o common/hwmon.o common/memory.o \ + common/command_submission.o common/mmu.o common/firmware_if.o \ + common/pci.o diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/common/asid.c index a2fdf31cf27c..a2fdf31cf27c 100644 --- a/drivers/misc/habanalabs/asid.c +++ b/drivers/misc/habanalabs/common/asid.c diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 02d13f71b1df..7c38c4f7f9c0 100644 --- a/drivers/misc/habanalabs/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -10,12 +10,18 @@ #include <linux/mm.h> #include <linux/slab.h> +#include <linux/genalloc.h> static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) { - hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, - (void *) (uintptr_t) cb->kernel_address, - cb->bus_address); + if (cb->is_internal) + gen_pool_free(hdev->internal_cb_pool, + cb->kernel_address, cb->size); + else + hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, + (void *) (uintptr_t) cb->kernel_address, + cb->bus_address); + kfree(cb); } @@ -44,9 +50,10 @@ static void cb_release(struct kref *ref) } static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, - int ctx_id) + int ctx_id, bool internal_cb) { struct hl_cb *cb; + u32 cb_offset; void *p; /* @@ -65,13 +72,25 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, if (!cb) return NULL; - if (ctx_id == HL_KERNEL_ASID_ID) + if (internal_cb) { + p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size); + if (!p) { + kfree(cb); + return NULL; + } + + cb_offset = p - hdev->internal_cb_pool_virt_addr; + cb->is_internal = true; + cb->bus_address = hdev->internal_cb_va_base + cb_offset; + } else if (ctx_id == HL_KERNEL_ASID_ID) { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC); - else + } else { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_USER | __GFP_ZERO); + } + if (!p) { dev_err(hdev->dev, "failed to allocate %d of dma memory for CB\n", @@ -87,7 +106,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, } int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u32 cb_size, u64 *handle, int ctx_id) + u32 cb_size, u64 *handle, int ctx_id, bool internal_cb) { struct hl_cb *cb; bool alloc_new_cb = true; @@ -112,28 +131,30 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, goto out_err; } - /* Minimum allocation must be PAGE SIZE */ - if (cb_size < PAGE_SIZE) - cb_size = PAGE_SIZE; - - if (ctx_id == HL_KERNEL_ASID_ID && - cb_size <= hdev->asic_prop.cb_pool_cb_size) { - - spin_lock(&hdev->cb_pool_lock); - if (!list_empty(&hdev->cb_pool)) { - cb = list_first_entry(&hdev->cb_pool, typeof(*cb), - pool_list); - list_del(&cb->pool_list); - spin_unlock(&hdev->cb_pool_lock); - alloc_new_cb = false; - } else { - spin_unlock(&hdev->cb_pool_lock); - dev_dbg(hdev->dev, "CB pool is empty\n"); + if (!internal_cb) { + /* Minimum allocation must be PAGE SIZE */ + if (cb_size < PAGE_SIZE) + cb_size = PAGE_SIZE; + + if (ctx_id == HL_KERNEL_ASID_ID && + cb_size <= hdev->asic_prop.cb_pool_cb_size) { + + spin_lock(&hdev->cb_pool_lock); + if (!list_empty(&hdev->cb_pool)) { + cb = list_first_entry(&hdev->cb_pool, + typeof(*cb), pool_list); + list_del(&cb->pool_list); + spin_unlock(&hdev->cb_pool_lock); + alloc_new_cb = false; + } else { + spin_unlock(&hdev->cb_pool_lock); + dev_dbg(hdev->dev, "CB pool is empty\n"); + } } } if (alloc_new_cb) { - cb = hl_cb_alloc(hdev, cb_size, ctx_id); + cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb); if (!cb) { rc = -ENOMEM; goto out_err; @@ -229,8 +250,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) rc = -EINVAL; } else { rc = hl_cb_create(hdev, &hpriv->cb_mgr, - args->in.cb_size, &handle, - hpriv->ctx->asid); + args->in.cb_size, &handle, + hpriv->ctx->asid, false); } memset(args, 0, sizeof(*args)); @@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr) idr_destroy(&mgr->cb_handles); } -struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size) +struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, + bool internal_cb) { u64 cb_handle; struct hl_cb *cb; int rc; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle, - HL_KERNEL_ASID_ID); + HL_KERNEL_ASID_ID, internal_cb); if (rc) { dev_err(hdev->dev, "Failed to allocate CB for the kernel driver %d\n", rc); @@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) { cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, - HL_KERNEL_ASID_ID); + HL_KERNEL_ASID_ID, false); if (cb) { cb->is_pool = true; list_add(&cb->pool_list, &hdev->cb_pool); diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index b0f62cbbdc87..e096532c0e48 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -246,6 +246,18 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) kfree(job); } +static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) +{ + hdev->aggregated_cs_counters.device_in_reset_drop_cnt += + ctx->cs_counters.device_in_reset_drop_cnt; + hdev->aggregated_cs_counters.out_of_mem_drop_cnt += + ctx->cs_counters.out_of_mem_drop_cnt; + hdev->aggregated_cs_counters.parsing_drop_cnt += + ctx->cs_counters.parsing_drop_cnt; + hdev->aggregated_cs_counters.queue_full_drop_cnt += + ctx->cs_counters.queue_full_drop_cnt; +} + static void cs_do_release(struct kref *ref) { struct hl_cs *cs = container_of(ref, struct hl_cs, @@ -349,6 +361,9 @@ static void cs_do_release(struct kref *ref) dma_fence_signal(cs->fence); dma_fence_put(cs->fence); + cs_counters_aggregate(hdev, cs->ctx); + + kfree(cs->jobs_in_queue_cnt); kfree(cs); } @@ -373,9 +388,9 @@ static void cs_timedout(struct work_struct *work) hdev = cs->ctx->hdev; ctx_asid = cs->ctx->asid; - /* TODO: add information about last signaled seq and last emitted seq */ - dev_err(hdev->dev, "User %d command submission %llu got stuck!\n", - ctx_asid, cs->sequence); + dev_err(hdev->dev, + "Command submission %llu has not finished in time!\n", + cs->sequence); cs_put(cs); @@ -418,21 +433,29 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, spin_lock(&ctx->cs_lock); cs_cmpl->cs_seq = ctx->cs_sequence; - other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)]; + other = ctx->cs_pending[cs_cmpl->cs_seq & + (hdev->asic_prop.max_pending_cs - 1)]; if ((other) && (!dma_fence_is_signaled(other))) { - spin_unlock(&ctx->cs_lock); dev_dbg(hdev->dev, "Rejecting CS because of too many in-flights CS\n"); rc = -EAGAIN; goto free_fence; } + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); + if (!cs->jobs_in_queue_cnt) { + rc = -ENOMEM; + goto free_fence; + } + dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock, ctx->asid, ctx->cs_sequence); cs->sequence = cs_cmpl->cs_seq; - ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] = + ctx->cs_pending[cs_cmpl->cs_seq & + (hdev->asic_prop.max_pending_cs - 1)] = &cs_cmpl->base_fence; ctx->cs_sequence++; @@ -447,6 +470,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, return 0; free_fence: + spin_unlock(&ctx->cs_lock); kfree(cs_cmpl); free_cs: kfree(cs); @@ -463,10 +487,12 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) void hl_cs_rollback_all(struct hl_device *hdev) { + int i; struct hl_cs *cs, *tmp; /* flush all completions */ - flush_workqueue(hdev->cq_wq); + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + flush_workqueue(hdev->cq_wq[i]); /* Make sure we don't have leftovers in the H/W queues mirror list */ list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list, @@ -499,10 +525,18 @@ static int validate_queue_index(struct hl_device *hdev, struct asic_fixed_properties *asic = &hdev->asic_prop; struct hw_queue_properties *hw_queue_prop; + /* This must be checked here to prevent out-of-bounds access to + * hw_queues_props array + */ + if (chunk->queue_index >= asic->max_queues) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + return -EINVAL; + } + hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; - if ((chunk->queue_index >= HL_MAX_QUEUES) || - (hw_queue_prop->type == QUEUE_TYPE_NA)) { + if (hw_queue_prop->type == QUEUE_TYPE_NA) { dev_err(hdev->dev, "Queue index %d is invalid\n", chunk->queue_index); return -EINVAL; @@ -630,12 +664,15 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = validate_queue_index(hdev, chunk, &queue_type, &is_kernel_allocated_cb); - if (rc) + if (rc) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; goto free_cs_object; + } if (is_kernel_allocated_cb) { cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); if (!cb) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; rc = -EINVAL; goto free_cs_object; } @@ -649,6 +686,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, job = hl_cs_allocate_job(hdev, queue_type, is_kernel_allocated_cb); if (!job) { + hpriv->ctx->cs_counters.out_of_mem_drop_cnt++; dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; if (is_kernel_allocated_cb) @@ -681,6 +719,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = cs_parser(hpriv, job); if (rc) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; dev_err(hdev->dev, "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", cs->ctx->asid, cs->sequence, job->id, rc); @@ -689,6 +728,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, } if (int_queues_only) { + hpriv->ctx->cs_counters.parsing_drop_cnt++; dev_err(hdev->dev, "Reject CS %d.%llu because only internal queues jobs are present\n", cs->ctx->asid, cs->sequence); @@ -738,6 +778,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, struct hl_cs_job *job; struct hl_cs *cs; struct hl_cb *cb; + enum hl_queue_type q_type; u64 *signal_seq_arr = NULL, signal_seq; u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size; int rc; @@ -770,9 +811,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, chunk = &cs_chunk_array[0]; q_idx = chunk->queue_index; hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; + q_type = hw_queue_prop->type; - if ((q_idx >= HL_MAX_QUEUES) || - (hw_queue_prop->type != QUEUE_TYPE_EXT)) { + if ((q_idx >= hdev->asic_prop.max_queues) || + (!hw_queue_prop->supports_sync_stream)) { dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); rc = -EINVAL; goto free_cs_chunk_array; @@ -869,25 +911,28 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, *cs_seq = cs->sequence; - job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); + job = hl_cs_allocate_job(hdev, q_type, true); if (!job) { + ctx->cs_counters.out_of_mem_drop_cnt++; dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; goto put_cs; } - cb = hl_cb_kernel_create(hdev, PAGE_SIZE); + if (cs->type == CS_TYPE_WAIT) + cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); + else + cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); + + cb = hl_cb_kernel_create(hdev, cb_size, + q_type == QUEUE_TYPE_HW && hdev->mmu_enable); if (!cb) { + ctx->cs_counters.out_of_mem_drop_cnt++; kfree(job); rc = -EFAULT; goto put_cs; } - if (cs->type == CS_TYPE_WAIT) - cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); - else - cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); - job->id = 0; job->cs = cs; job->user_cb = cb; @@ -1126,7 +1171,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev, rc = PTR_ERR(fence); if (rc == -EINVAL) dev_notice_ratelimited(hdev->dev, - "Can't wait on seq %llu because current CS is at seq %llu\n", + "Can't wait on CS %llu because current CS is at seq %llu\n", seq, ctx->cs_sequence); } else if (fence) { rc = dma_fence_wait_timeout(fence, true, timeout); @@ -1159,15 +1204,21 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) memset(args, 0, sizeof(*args)); if (rc < 0) { - dev_err_ratelimited(hdev->dev, - "Error %ld on waiting for CS handle %llu\n", - rc, seq); if (rc == -ERESTARTSYS) { + dev_err_ratelimited(hdev->dev, + "user process got signal while waiting for CS handle %llu\n", + seq); args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; rc = -EINTR; } else if (rc == -ETIMEDOUT) { + dev_err_ratelimited(hdev->dev, + "CS %llu has timed-out while user process is waiting for it\n", + seq); args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; } else if (rc == -EIO) { + dev_err_ratelimited(hdev->dev, + "CS %llu has been aborted while user process is waiting for it\n", + seq); args->out.status = HL_WAIT_CS_STATUS_ABORTED; } return rc; diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/common/context.c index ec92b3506b1f..3e375958e73b 100644 --- a/drivers/misc/habanalabs/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -22,9 +22,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx) * to this function unless the ref count is 0 */ - for (i = 0 ; i < HL_MAX_PENDING_CS ; i++) + for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++) dma_fence_put(ctx->cs_pending[i]); + kfree(ctx->cs_pending); + if (ctx->asid != HL_KERNEL_ASID_ID) { /* The engines are stopped as there is no executing CS, but the * Coresight might be still working by accessing addresses @@ -110,8 +112,7 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx) return; dev_warn(hdev->dev, - "Context %d closed or terminated but its CS are executing\n", - ctx->asid); + "user process released device but its command submissions are still executing\n"); } int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) @@ -126,34 +127,49 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) spin_lock_init(&ctx->cs_lock); atomic_set(&ctx->thread_ctx_switch_token, 1); ctx->thread_ctx_switch_wait_token = 0; + ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs, + sizeof(struct dma_fence *), + GFP_KERNEL); + if (!ctx->cs_pending) + return -ENOMEM; if (is_kernel_ctx) { ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */ rc = hl_mmu_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "Failed to init mmu ctx module\n"); - goto mem_ctx_err; + goto err_free_cs_pending; } } else { ctx->asid = hl_asid_alloc(hdev); if (!ctx->asid) { dev_err(hdev->dev, "No free ASID, failed to create context\n"); - return -ENOMEM; + rc = -ENOMEM; + goto err_free_cs_pending; } rc = hl_vm_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "Failed to init mem ctx module\n"); rc = -ENOMEM; - goto mem_ctx_err; + goto err_asid_free; + } + + rc = hdev->asic_funcs->ctx_init(ctx); + if (rc) { + dev_err(hdev->dev, "ctx_init failed\n"); + goto err_vm_ctx_fini; } } return 0; -mem_ctx_err: - if (ctx->asid != HL_KERNEL_ASID_ID) - hl_asid_free(hdev, ctx->asid); +err_vm_ctx_fini: + hl_vm_ctx_fini(ctx); +err_asid_free: + hl_asid_free(hdev, ctx->asid); +err_free_cs_pending: + kfree(ctx->cs_pending); return rc; } @@ -170,6 +186,7 @@ int hl_ctx_put(struct hl_ctx *ctx) struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) { + struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop; struct dma_fence *fence; spin_lock(&ctx->cs_lock); @@ -179,13 +196,13 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) return ERR_PTR(-EINVAL); } - if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) { + if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) { spin_unlock(&ctx->cs_lock); return NULL; } fence = dma_fence_get( - ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]); + ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]); spin_unlock(&ctx->cs_lock); return fence; diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index fc4372c18ce2..fc4372c18ce2 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/common/device.c index 2b38a119704c..9919ff121067 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -249,7 +249,8 @@ static void device_cdev_sysfs_del(struct hl_device *hdev) */ static int device_early_init(struct hl_device *hdev) { - int rc; + int i, rc; + char workq_name[32]; switch (hdev->asic_type) { case ASIC_GOYA: @@ -274,11 +275,24 @@ static int device_early_init(struct hl_device *hdev) if (rc) goto early_fini; - hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0); - if (hdev->cq_wq == NULL) { - dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); - rc = -ENOMEM; - goto asid_fini; + if (hdev->asic_prop.completion_queues_count) { + hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, + sizeof(*hdev->cq_wq), + GFP_ATOMIC); + if (!hdev->cq_wq) { + rc = -ENOMEM; + goto asid_fini; + } + } + + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { + snprintf(workq_name, 32, "hl-free-jobs-%u", i); + hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); + if (hdev->cq_wq == NULL) { + dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); + rc = -ENOMEM; + goto free_cq_wq; + } } hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0); @@ -321,7 +335,10 @@ free_chip_info: free_eq_wq: destroy_workqueue(hdev->eq_wq); free_cq_wq: - destroy_workqueue(hdev->cq_wq); + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + if (hdev->cq_wq[i]) + destroy_workqueue(hdev->cq_wq[i]); + kfree(hdev->cq_wq); asid_fini: hl_asid_fini(hdev); early_fini: @@ -339,6 +356,8 @@ early_fini: */ static void device_early_fini(struct hl_device *hdev) { + int i; + mutex_destroy(&hdev->mmu_cache_lock); mutex_destroy(&hdev->debug_lock); mutex_destroy(&hdev->send_cpu_message_lock); @@ -351,7 +370,10 @@ static void device_early_fini(struct hl_device *hdev) kfree(hdev->hl_chip_info); destroy_workqueue(hdev->eq_wq); - destroy_workqueue(hdev->cq_wq); + + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + destroy_workqueue(hdev->cq_wq[i]); + kfree(hdev->cq_wq); hl_asid_fini(hdev); @@ -838,6 +860,22 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, if (rc) return 0; + if (hard_reset) { + /* Disable PCI access from device F/W so he won't send + * us additional interrupts. We disable MSI/MSI-X at + * the halt_engines function and we can't have the F/W + * sending us interrupts after that. We need to disable + * the access here because if the device is marked + * disable, the message won't be send. Also, in case + * of heartbeat, the device CPU is marked as disable + * so this message won't be sent + */ + if (hl_fw_send_pci_access_msg(hdev, + ARMCP_PACKET_DISABLE_PCI_ACCESS)) + dev_warn(hdev->dev, + "Failed to disable PCI access by F/W\n"); + } + /* This also blocks future CS/VM/JOB completion operations */ hdev->disabled = true; @@ -995,6 +1033,12 @@ again: } } + /* Device is now enabled as part of the initialization requires + * communication with the device firmware to get information that + * is required for the initialization itself + */ + hdev->disabled = false; + rc = hdev->asic_funcs->hw_init(hdev); if (rc) { dev_err(hdev->dev, @@ -1002,8 +1046,6 @@ again: goto out_err; } - hdev->disabled = false; - /* Check that the communication with the device is working */ rc = hdev->asic_funcs->test_queues(hdev); if (rc) { @@ -1144,14 +1186,17 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) * because there the addresses of the completion queues are being * passed as arguments to request_irq */ - hdev->completion_queue = kcalloc(cq_cnt, - sizeof(*hdev->completion_queue), - GFP_KERNEL); + if (cq_cnt) { + hdev->completion_queue = kcalloc(cq_cnt, + sizeof(*hdev->completion_queue), + GFP_KERNEL); - if (!hdev->completion_queue) { - dev_err(hdev->dev, "failed to allocate completion queues\n"); - rc = -ENOMEM; - goto hw_queues_destroy; + if (!hdev->completion_queue) { + dev_err(hdev->dev, + "failed to allocate completion queues\n"); + rc = -ENOMEM; + goto hw_queues_destroy; + } } for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) { @@ -1162,6 +1207,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) "failed to initialize completion queue\n"); goto cq_fini; } + hdev->completion_queue[i].cq_idx = i; } /* @@ -1219,6 +1265,12 @@ int hl_device_init(struct hl_device *hdev, struct class |