summaryrefslogtreecommitdiffstats
path: root/drivers/misc/habanalabs/command_submission.c
diff options
context:
space:
mode:
authorOfir Bitton <obitton@habana.ai>2020-06-02 12:28:27 +0300
committerOded Gabbay <oded.gabbay@gmail.com>2020-07-24 20:31:34 +0300
commitc16d45f42b64e91895f4bc1cf19febeb5e0c52b6 (patch)
tree0e248646613ecbb8fa69681dfc7d835c59dfcb6d /drivers/misc/habanalabs/command_submission.c
parent0b168c8f1d21f87003fb28b4c87c32335d7fc94b (diff)
habanalabs: Use pending CS amount per ASIC
Training schemes requires much more concurrent command submissions than inference does. In addition, training command submissions can be completed in a non serialized manner. Hence, we add support in which each ASIC will be able to configure the amount of concurrent pending command submissions, rather than use a predefined amount. This change will enhance performance by allowing the user to add more concurrent work without waiting for the previous work to be completed. Signed-off-by: Ofir Bitton <obitton@habana.ai> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers/misc/habanalabs/command_submission.c')
-rw-r--r--drivers/misc/habanalabs/command_submission.c6
1 files changed, 4 insertions, 2 deletions
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index b0f62cbbdc87..e99c1d126bd3 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -418,7 +418,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
spin_lock(&ctx->cs_lock);
cs_cmpl->cs_seq = ctx->cs_sequence;
- other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)];
+ other = ctx->cs_pending[cs_cmpl->cs_seq &
+ (hdev->asic_prop.max_pending_cs - 1)];
if ((other) && (!dma_fence_is_signaled(other))) {
spin_unlock(&ctx->cs_lock);
dev_dbg(hdev->dev,
@@ -432,7 +433,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs->sequence = cs_cmpl->cs_seq;
- ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] =
+ ctx->cs_pending[cs_cmpl->cs_seq &
+ (hdev->asic_prop.max_pending_cs - 1)] =
&cs_cmpl->base_fence;
ctx->cs_sequence++;