summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/debugfs.c
diff options
context:
space:
mode:
authorOded Gabbay <oded.gabbay@gmail.com>2019-02-28 10:46:12 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-02-28 13:04:59 +0100
commita28ce422a6d926c11d7e72a83ccaa4f9b06077ea (patch)
tree65f08f69e8eec0cf7d5272a87e2e612f975c04ec /drivers/misc/habanalabs/debugfs.c
parent27ca384cb7c44b8b16ea43f9aed930664140360e (diff)
habanalabs: disable CPU access on timeouts
This patch provides a workaround for a bug in the F/W where the response time for a request from KMD may take more then 100ms. This could cause the queue between KMD and the F/W to get out of sync. The WA is to: 1. Increase the timeout of ALL requests to 1s. 2. In case a request isn't answered in time, mark the state as "cpu_disabled" and prevent sending further requests from KMD to the F/W. This will eventually lead to a heartbeat failure and hard reset of the device. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc/habanalabs/debugfs.c')
-rw-r--r--drivers/misc/habanalabs/debugfs.c6
1 files changed, 4 insertions, 2 deletions
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index f472b572faea..1d2bbcf90f16 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -723,7 +723,7 @@ static ssize_t hl_device_read(struct file *f, char __user *buf,
return 0;
sprintf(tmp_buf,
- "Valid values are: disable, enable, suspend, resume\n");
+ "Valid values: disable, enable, suspend, resume, cpu_timeout\n");
rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
strlen(tmp_buf) + 1);
@@ -751,9 +751,11 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf,
hdev->asic_funcs->suspend(hdev);
} else if (strncmp("resume", data, strlen("resume")) == 0) {
hdev->asic_funcs->resume(hdev);
+ } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {
+ hdev->device_cpu_disabled = true;
} else {
dev_err(hdev->dev,
- "Valid values are: disable, enable, suspend, resume\n");
+ "Valid values: disable, enable, suspend, resume, cpu_timeout\n");
count = -EINVAL;
}