From a12de1d42d74ef3c80e9fb9a2da94daaef747869 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 27 Sep 2019 15:24:30 +0800 Subject: blk-mq: honor IO scheduler for multiqueue devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a device is using multiple queues, the IO scheduler may be bypassed. This may hurt performance for some slow MQ devices, and it also breaks zoned devices which depend on mq-deadline for respecting the write order in one zone. Don't bypass io scheduler if we have one setup. This patch can double sequential write performance basically on MQ scsi_debug when mq-deadline is applied. Cc: Bart Van Assche Cc: Hannes Reinecke Cc: Dave Chinner Reviewed-by: Javier González Reviewed-by: Damien Le Moal Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 6e3b15f70cd7..5d4bef3c378d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2012,6 +2012,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) } blk_add_rq_to_plug(plug, rq); + } else if (q->elevator) { + blk_mq_sched_insert_request(rq, false, true, true); } else if (plug && !blk_queue_nomerges(q)) { /* * We do limited plugging. If the bio can be merged, do that. @@ -2035,8 +2037,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_try_issue_directly(data.hctx, same_queue_rq, &cookie); } - } else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator && - !data.hctx->dispatch_busy)) { + } else if ((q->nr_hw_queues > 1 && is_sync) || + !data.hctx->dispatch_busy) { blk_mq_try_issue_directly(data.hctx, rq, &cookie); } else { blk_mq_sched_insert_request(rq, false, true, true); -- cgit v1.2.3 From 3154df262db52f1d27e01020e871f4345ec2f9a8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 27 Sep 2019 15:24:31 +0800 Subject: blk-mq: apply normal plugging for HDD Some HDD drive may expose multiple hardware queues, such as MegraRaid. Let's apply the normal plugging for such devices because sequential IO may benefit a lot from plug merging. Cc: Bart Van Assche Cc: Hannes Reinecke Cc: Dave Chinner Reviewed-by: Damien Le Moal Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 5d4bef3c378d..ec791156e9cc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1992,10 +1992,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) /* bypass scheduler for flush rq */ blk_insert_flush(rq); blk_mq_run_hw_queue(data.hctx, true); - } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) { + } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs || + !blk_queue_nonrot(q))) { /* * Use plugging if we have a ->commit_rqs() hook as well, as * we know the driver uses bd->last in a smart fashion. + * + * Use normal plugging if this disk is slow HDD, as sequential + * IO may benefit a lot from plug merging. */ unsigned int request_count = plug->rq_count; struct request *last = NULL; -- cgit v1.2.3 From dc301025658aef326763765b0bebeaa44569ed20 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 2 Oct 2019 19:23:05 -0700 Subject: block: sed-opal: fix sparse warning: obsolete array init. Fix sparse warning: (missing '=') ../block/sed-opal.c:133:17: warning: obsolete array initializer, use C99 syntax Fixes: ff91064ea37c ("block: sed-opal: check size of shadow mbr") Cc: linux-block@vger.kernel.org Cc: Jonas Rabenstein Cc: David Kozub Reviewed-by: Scott Bauer Reviewed-by: Revanth Rajashekar Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- block/sed-opal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/sed-opal.c b/block/sed-opal.c index 4e95a9792162..59a6ba2131d5 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -129,7 +129,7 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = { { 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 }, /* tables */ - [OPAL_TABLE_TABLE] + [OPAL_TABLE_TABLE] = { 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 }, [OPAL_LOCKINGRANGE_GLOBAL] = { 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 }, -- cgit v1.2.3 From a9eb49c964884654dd6394cb6abe7ceb021c9c96 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 2 Oct 2019 19:23:15 -0700 Subject: block: sed-opal: fix sparse warning: convert __be64 data sparse warns about incorrect type when using __be64 data. It is not being converted to CPU-endian but it should be. Fixes these sparse warnings: ../block/sed-opal.c:375:20: warning: incorrect type in assignment (different base types) ../block/sed-opal.c:375:20: expected unsigned long long [usertype] align ../block/sed-opal.c:375:20: got restricted __be64 const [usertype] alignment_granularity ../block/sed-opal.c:376:25: warning: incorrect type in assignment (different base types) ../block/sed-opal.c:376:25: expected unsigned long long [usertype] lowest_lba ../block/sed-opal.c:376:25: got restricted __be64 const [usertype] lowest_aligned_lba Fixes: 455a7b238cd6 ("block: Add Sed-opal library") Cc: Scott Bauer Cc: Rafael Antognolli Cc: linux-block@vger.kernel.org Reviewed-by: Jon Derrick Signed-off-by: Randy Dunlap Signed-off-by: Jens Axboe --- block/sed-opal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/sed-opal.c b/block/sed-opal.c index 59a6ba2131d5..b4c761973ac1 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -372,8 +372,8 @@ static void check_geometry(struct opal_dev *dev, const void *data) { const struct d0_geometry_features *geo = data; - dev->align = geo->alignment_granularity; - dev->lowest_lba = geo->lowest_aligned_lba; + dev->align = be64_to_cpu(geo->alignment_granularity); + dev->lowest_lba = be64_to_cpu(geo->lowest_aligned_lba); } static int execute_step(struct opal_dev *dev, -- cgit v1.2.3 From b84477d3ebb96294f87dc3161e53fa8fe22d9bfd Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Sat, 5 Oct 2019 11:59:27 -0700 Subject: blk-wbt: fix performance regression in wbt scale_up/scale_down scale_up wakes up waiters after scaling up. But after scaling max, it should not wake up more waiters as waiters will not have anything to do. This patch fixes this by making scale_up (and also scale_down) return when threshold is reached. This bug causes increased fdatasync latency when fdatasync and dd conv=sync are performed in parallel on 4.19 compared to 4.14. This bug was introduced during refactoring of blk-wbt code. Fixes: a79050434b45 ("blk-rq-qos: refactor out common elements of blk-wbt") Cc: stable@vger.kernel.org Cc: Josef Bacik Signed-off-by: Harshad Shirwadkar Signed-off-by: Jens Axboe --- block/blk-rq-qos.c | 14 +++++++++----- block/blk-rq-qos.h | 4 ++-- block/blk-wbt.c | 6 ++++-- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 61b635bc2a31..656460636ad3 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -160,24 +160,27 @@ bool rq_depth_calc_max_depth(struct rq_depth *rqd) return ret; } -void rq_depth_scale_up(struct rq_depth *rqd) +/* Returns true on success and false if scaling up wasn't possible */ +bool rq_depth_scale_up(struct rq_depth *rqd) { /* * Hit max in previous round, stop here */ if (rqd->scaled_max) - return; + return false; rqd->scale_step--; rqd->scaled_max = rq_depth_calc_max_depth(rqd); + return true; } /* * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we - * had a latency violation. + * had a latency violation. Returns true on success and returns false if + * scaling down wasn't possible. */ -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) { /* * Stop scaling down when we've hit the limit. This also prevents @@ -185,7 +188,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) * keep up. */ if (rqd->max_depth == 1) - return; + return false; if (rqd->scale_step < 0 && hard_throttle) rqd->scale_step = 0; @@ -194,6 +197,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) rqd->scaled_max = false; rq_depth_calc_max_depth(rqd); + return true; } struct rq_qos_wait_data { diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 08a09dbe0f4b..e8cb68f6958a 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -130,8 +130,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, acquire_inflight_cb_t *acquire_inflight_cb, cleanup_cb_t *cleanup_cb); bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit); -void rq_depth_scale_up(struct rq_depth *rqd); -void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle); +bool rq_depth_scale_up(struct rq_depth *rqd); +bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle); bool rq_depth_calc_max_depth(struct rq_depth *rqd); void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 8af553a0ba00..8641ba9793c5 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -308,7 +308,8 @@ static void calc_wb_limits(struct rq_wb *rwb) static void scale_up(struct rq_wb *rwb) { - rq_depth_scale_up(&rwb->rq_depth); + if (!rq_depth_scale_up(&rwb->rq_depth)) + return; calc_wb_limits(rwb); rwb->unknown_cnt = 0; rwb_wake_all(rwb); @@ -317,7 +318,8 @@ static void scale_up(struct rq_wb *rwb) static void scale_down(struct rq_wb *rwb, bool hard_throttle) { - rq_depth_scale_down(&rwb->rq_depth, hard_throttle); + if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle)) + return; calc_wb_limits(rwb); rwb->unknown_cnt = 0; rwb_trace_step(rwb, "scale down"); -- cgit v1.2.3