summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2020-09-10 15:50:56 +0800
committerMartin K. Petersen <martin.petersen@oracle.com>2020-09-15 22:20:11 -0400
commited5dd6a67d5eac5fb8873697b55dc1699752a9f3 (patch)
treef2b12578e83f2b256c0dbf532741ee1f41d110d3
parentf97e6e1eabbfed0ec3ccce7562df26a5b21d0d99 (diff)
scsi: core: Only re-run queue in scsi_end_request() if device queue is busy
The request queue is currently run unconditionally in scsi_end_request() if both target queue and host queue are ready. Recently Long Li reported that cost of a queue run can be very heavy in case of high queue depth. Improve this situation by only running the request queue when this LUN is busy. Link: https://lore.kernel.org/r/20200910075056.36509-1-ming.lei@redhat.com Reported-by: Long Li <longli@microsoft.com> Tested-by: Long Li <longli@microsoft.com> Tested-by: Kashyap Desai <kashyap.desai@broadcom.com> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Ewan D. Milne <emilne@redhat.com> Reviewed-by: John Garry <john.garry@huawei.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/scsi/scsi_lib.c48
-rw-r--r--include/scsi/scsi_device.h1
2 files changed, 45 insertions, 4 deletions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7c6dd6f75190..f0ee11dc07e4 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -549,10 +549,27 @@ static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
static void scsi_run_queue_async(struct scsi_device *sdev)
{
if (scsi_target(sdev)->single_lun ||
- !list_empty(&sdev->host->starved_list))
+ !list_empty(&sdev->host->starved_list)) {
kblockd_schedule_work(&sdev->requeue_work);
- else
- blk_mq_run_hw_queues(sdev->request_queue, true);
+ } else {
+ /*
+ * smp_mb() present in sbitmap_queue_clear() or implied in
+ * .end_io is for ordering writing .device_busy in
+ * scsi_device_unbusy() and reading sdev->restarts.
+ */
+ int old = atomic_read(&sdev->restarts);
+
+ /*
+ * ->restarts has to be kept as non-zero if new budget
+ * contention occurs.
+ *
+ * No need to run queue when either another re-run
+ * queue wins in updating ->restarts or a new budget
+ * contention occurs.
+ */
+ if (old && atomic_cmpxchg(&sdev->restarts, old, 0) == old)
+ blk_mq_run_hw_queues(sdev->request_queue, true);
+ }
}
/* Returns false when no more bytes to process, true if there are more */
@@ -1612,7 +1629,30 @@ static bool scsi_mq_get_budget(struct request_queue *q)
{
struct scsi_device *sdev = q->queuedata;
- return scsi_dev_queue_ready(q, sdev);
+ if (scsi_dev_queue_ready(q, sdev))
+ return true;
+
+ atomic_inc(&sdev->restarts);
+
+ /*
+ * Orders atomic_inc(&sdev->restarts) and atomic_read(&sdev->device_busy).
+ * .restarts must be incremented before .device_busy is read because the
+ * code in scsi_run_queue_async() depends on the order of these operations.
+ */
+ smp_mb__after_atomic();
+
+ /*
+ * If all in-flight requests originated from this LUN are completed
+ * before reading .device_busy, sdev->device_busy will be observed as
+ * zero, then blk_mq_delay_run_hw_queues() will dispatch this request
+ * soon. Otherwise, completion of one of these requests will observe
+ * the .restarts flag, and the request queue will be run for handling
+ * this request, see scsi_end_request().
+ */
+ if (unlikely(atomic_read(&sdev->device_busy) == 0 &&
+ !scsi_device_blocked(sdev)))
+ blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY);
+ return false;
}
static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index bc5909033d13..1a5c9a3df6d6 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -109,6 +109,7 @@ struct scsi_device {
atomic_t device_busy; /* commands actually active on LLDD */
atomic_t device_blocked; /* Device returned QUEUE_FULL. */
+ atomic_t restarts;
spinlock_t list_lock;
struct list_head starved_entry;
unsigned short queue_depth; /* How deep of a queue we want */