summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 18:14:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 18:14:36 -0700
commit67a242223958d628f0ba33283668e3ddd192d057 (patch)
treea39e7039e9a2ef9ab46f8ba561175dbdc6101d11 /block
parent8b35ad6232c462b02e397e87ce702bcddd4ba543 (diff)
parentb8753433fc611e23e31300e1d099001a08955c88 (diff)
Merge tag 'for-5.2/block-20190507' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "Nothing major in this series, just fixes and improvements all over the map. This contains: - Series of fixes for sed-opal (David, Jonas) - Fixes and performance tweaks for BFQ (via Paolo) - Set of fixes for bcache (via Coly) - Set of fixes for md (via Song) - Enabling multi-page for passthrough requests (Ming) - Queue release fix series (Ming) - Device notification improvements (Martin) - Propagate underlying device rotational status in loop (Holger) - Removal of mtip32xx trim support, which has been disabled for years (Christoph) - Improvement and cleanup of nvme command handling (Christoph) - Add block SPDX tags (Christoph) - Cleanup/hardening of bio/bvec iteration (Christoph) - A few NVMe pull requests (Christoph) - Removal of CONFIG_LBDAF (Christoph) - Various little fixes here and there" * tag 'for-5.2/block-20190507' of git://git.kernel.dk/linux-block: (164 commits) block: fix mismerge in bvec_advance block: don't drain in-progress dispatch in blk_cleanup_queue() blk-mq: move cancel of hctx->run_work into blk_mq_hw_sysfs_release blk-mq: always free hctx after request queue is freed blk-mq: split blk_mq_alloc_and_init_hctx into two parts blk-mq: free hw queue's resource in hctx's release handler blk-mq: move cancel of requeue_work into blk_mq_release blk-mq: grab .q_usage_counter when queuing request from plug code path block: fix function name in comment nvmet: protect discovery change log event list iteration nvme: mark nvme_core_init and nvme_core_exit static nvme: move command size checks to the core nvme-fabrics: check more command sizes nvme-pci: check more command sizes nvme-pci: remove an unneeded variable initialization nvme-pci: unquiesce admin queue on shutdown nvme-pci: shutdown on timeout during deletion nvme-pci: fix psdt field for single segment sgls nvme-multipath: don't print ANA group state by default nvme-multipath: split bios with the ns_head bio_set before submitting ...
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig24
-rw-r--r--block/badblocks.c10
-rw-r--r--block/bfq-cgroup.c16
-rw-r--r--block/bfq-iosched.c811
-rw-r--r--block/bfq-iosched.h107
-rw-r--r--block/bfq-wf2q.c23
-rw-r--r--block/bio-integrity.c16
-rw-r--r--block/bio.c286
-rw-r--r--block/blk-cgroup.c1
-rw-r--r--block/blk-core.c24
-rw-r--r--block/blk-exec.c1
-rw-r--r--block/blk-flush.c3
-rw-r--r--block/blk-integrity.c16
-rw-r--r--block/blk-iolatency.c1
-rw-r--r--block/blk-merge.c147
-rw-r--r--block/blk-mq-cpumap.c1
-rw-r--r--block/blk-mq-debugfs.c13
-rw-r--r--block/blk-mq-pci.c10
-rw-r--r--block/blk-mq-rdma.c10
-rw-r--r--block/blk-mq-sched.c13
-rw-r--r--block/blk-mq-sysfs.c9
-rw-r--r--block/blk-mq-tag.c1
-rw-r--r--block/blk-mq-virtio.c10
-rw-r--r--block/blk-mq.c192
-rw-r--r--block/blk-mq.h2
-rw-r--r--block/blk-rq-qos.c2
-rw-r--r--block/blk-rq-qos.h1
-rw-r--r--block/blk-settings.c17
-rw-r--r--block/blk-stat.c1
-rw-r--r--block/blk-sysfs.c31
-rw-r--r--block/blk-timeout.c1
-rw-r--r--block/blk-wbt.c1
-rw-r--r--block/blk-zoned.c1
-rw-r--r--block/blk.h2
-rw-r--r--block/bounce.c3
-rw-r--r--block/bsg-lib.c16
-rw-r--r--block/bsg.c9
-rw-r--r--block/elevator.c7
-rw-r--r--block/genhd.c68
-rw-r--r--block/ioctl.c1
-rw-r--r--block/ioprio.c1
-rw-r--r--block/kyber-iosched.c13
-rw-r--r--block/mq-deadline.c1
-rw-r--r--block/opal_proto.h12
-rw-r--r--block/partition-generic.c7
-rw-r--r--block/partitions/acorn.c7
-rw-r--r--block/partitions/aix.h1
-rw-r--r--block/partitions/amiga.h1
-rw-r--r--block/partitions/efi.c16
-rw-r--r--block/partitions/efi.h16
-rw-r--r--block/partitions/ibm.h1
-rw-r--r--block/partitions/karma.h1
-rw-r--r--block/partitions/ldm.c16
-rw-r--r--block/partitions/ldm.h16
-rw-r--r--block/partitions/msdos.h1
-rw-r--r--block/partitions/osf.h1
-rw-r--r--block/partitions/sgi.h1
-rw-r--r--block/partitions/sun.h1
-rw-r--r--block/partitions/sysv68.h1
-rw-r--r--block/partitions/ultrix.h1
-rw-r--r--block/scsi_ioctl.c16
-rw-r--r--block/sed-opal.c726
-rw-r--r--block/t10-pi.c19
63 files changed, 1495 insertions, 1289 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 028bc085dac8..1b220101a9cb 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -26,30 +26,6 @@ menuconfig BLOCK
if BLOCK
-config LBDAF
- bool "Support for large (2TB+) block devices and files"
- depends on !64BIT
- default y
- help
- Enable block devices or files of size 2TB and larger.
-
- This option is required to support the full capacity of large
- (2TB+) block devices, including RAID, disk, Network Block Device,
- Logical Volume Manager (LVM) and loopback.
-
- This option also enables support for single files larger than
- 2TB.
-
- The ext4 filesystem requires that this feature be enabled in
- order to support filesystems that have the huge_file feature
- enabled. Otherwise, it will refuse to mount in the read-write
- mode any filesystems that use the huge_file feature, which is
- enabled by default by mke2fs.ext4.
-
- The GFS2 filesystem also requires this feature.
-
- If unsure, say Y.
-
config BLK_SCSI_REQUEST
bool
diff --git a/block/badblocks.c b/block/badblocks.c
index 91f7bcf979d3..2e5f5697db35 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -1,18 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Bad block management
*
* - Heavily based on MD badblocks code from Neil Brown
*
* Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/badblocks.h>
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index c6113af31960..b3796a40a61a 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* cgroups support for the BFQ I/O scheduler.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
*/
#include <linux/module.h>
#include <linux/slab.h>
@@ -578,7 +569,8 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqg_and_blkg_get(bfqg);
if (bfq_bfqq_busy(bfqq)) {
- bfq_pos_tree_add_move(bfqd, bfqq);
+ if (unlikely(!bfqd->nonrot_with_queueing))
+ bfq_pos_tree_add_move(bfqd, bfqq);
bfq_activate_bfqq(bfqd, bfqq);
}
@@ -1102,7 +1094,7 @@ struct cftype bfq_blkcg_legacy_files[] = {
},
#endif /* CONFIG_DEBUG_BLK_CGROUP */
- /* the same statictics which cover the bfqg and its descendants */
+ /* the same statistics which cover the bfqg and its descendants */
{
.name = "bfq.io_service_bytes_recursive",
.private = (unsigned long)&blkcg_policy_bfq,
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 5ba1e0d841b4..f8d430f88d25 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Budget Fair Queueing (BFQ) I/O scheduler.
*
@@ -12,16 +13,6 @@
*
* Copyright (C) 2017 Paolo Valente <paolo.valente@linaro.org>
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
* BFQ is a proportional-share I/O scheduler, with some extra
* low-latency capabilities. BFQ also supports full hierarchical
* scheduling through cgroups. Next paragraphs provide an introduction
@@ -189,7 +180,7 @@ static const int bfq_default_max_budget = 16 * 1024;
/*
* When a sync request is dispatched, the queue that contains that
* request, and all the ancestor entities of that queue, are charged
- * with the number of sectors of the request. In constrast, if the
+ * with the number of sectors of the request. In contrast, if the
* request is async, then the queue and its ancestor entities are
* charged with the number of sectors of the request, multiplied by
* the factor below. This throttles the bandwidth for async I/O,
@@ -217,7 +208,7 @@ const int bfq_timeout = HZ / 8;
* queue merging.
*
* As can be deduced from the low time limit below, queue merging, if
- * successful, happens at the very beggining of the I/O of the involved
+ * successful, happens at the very beginning of the I/O of the involved
* cooperating processes, as a consequence of the arrival of the very
* first requests from each cooperator. After that, there is very
* little chance to find cooperators.
@@ -242,6 +233,14 @@ static struct kmem_cache *bfq_pool;
blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT))
#define BFQQ_CLOSE_THR (sector_t)(8 * 1024)
#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 19)
+/*
+ * Sync random I/O is likely to be confused with soft real-time I/O,
+ * because it is characterized by limited throughput and apparently
+ * isochronous arrival pattern. To avoid false positives, queues
+ * containing only random (seeky) I/O are prevented from being tagged
+ * as soft real-time.
+ */
+#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history & -1)
/* Min number of samples required to perform peak-rate update */
#define BFQ_RATE_MIN_SAMPLES 32
@@ -433,7 +432,7 @@ void bfq_schedule_dispatch(struct bfq_data *bfqd)
/*
* Lifted from AS - choose which of rq1 and rq2 that is best served now.
- * We choose the request that is closesr to the head right now. Distance
+ * We choose the request that is closer to the head right now. Distance
* behind the head is penalized and only allowed to a certain extent.
*/
static struct request *bfq_choose_req(struct bfq_data *bfqd,
@@ -595,7 +594,16 @@ static bool bfq_too_late_for_merging(struct bfq_queue *bfqq)
bfq_merge_time_limit);
}
-void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+/*
+ * The following function is not marked as __cold because it is
+ * actually cold, but for the same performance goal described in the
+ * comments on the likely() at the beginning of
+ * bfq_setup_cooperator(). Unexpectedly, to reach an even lower
+ * execution time for the case where this function is not invoked, we
+ * had to add an unlikely() in each involved if().
+ */
+void __cold
+bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
struct rb_node **p, *parent;
struct bfq_queue *__bfqq;
@@ -629,12 +637,19 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
}
/*
- * The following function returns true if every queue must receive the
- * same share of the throughput (this condition is used when deciding
- * whether idling may be disabled, see the comments in the function
- * bfq_better_to_idle()).
+ * The following function returns false either if every active queue
+ * must receive the same share of the throughput (symmetric scenario),
+ * or, as a special case, if bfqq must receive a share of the
+ * throughput lower than or equal to the share that every other active
+ * queue must receive. If bfqq does sync I/O, then these are the only
+ * two cases where bfqq happens to be guaranteed its share of the
+ * throughput even if I/O dispatching is not plugged when bfqq remains
+ * temporarily empty (for more details, see the comments in the
+ * function bfq_better_to_idle()). For this reason, the return value
+ * of this function is used to check whether I/O-dispatch plugging can
+ * be avoided.
*
- * Such a scenario occurs when:
+ * The above first case (symmetric scenario) occurs when:
* 1) all active queues have the same weight,
* 2) all active queues belong to the same I/O-priority class,
* 3) all active groups at the same level in the groups tree have the same
@@ -654,30 +669,36 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
* support or the cgroups interface are not enabled, thus no state
* needs to be maintained in this case.
*/
-static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
+static bool bfq_asymmetric_scenario(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq)
{
+ bool smallest_weight = bfqq &&
+ bfqq->weight_counter &&
+ bfqq->weight_counter ==
+ container_of(
+ rb_first_cached(&bfqd->queue_weights_tree),
+ struct bfq_weight_counter,
+ weights_node);
+
/*
* For queue weights to differ, queue_weights_tree must contain
* at least two nodes.
*/
- bool varied_queue_weights = !RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
- (bfqd->queue_weights_tree.rb_node->rb_left ||
- bfqd->queue_weights_tree.rb_node->rb_right);
+ bool varied_queue_weights = !smallest_weight &&
+ !RB_EMPTY_ROOT(&bfqd->queue_weights_tree.rb_root) &&
+ (bfqd->queue_weights_tree.rb_root.rb_node->rb_left ||
+ bfqd->queue_weights_tree.rb_root.rb_node->rb_right);
bool multiple_classes_busy =
(bfqd->busy_queues[0] && bfqd->busy_queues[1]) ||
(bfqd->busy_queues[0] && bfqd->busy_queues[2]) ||
(bfqd->busy_queues[1] && bfqd->busy_queues[2]);
- /*
- * For queue weights to differ, queue_weights_tree must contain
- * at least two nodes.
- */
- return !(varied_queue_weights || multiple_classes_busy
+ return varied_queue_weights || multiple_classes_busy
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|| bfqd->num_groups_with_pending_reqs > 0
#endif
- );
+ ;
}
/*
@@ -694,10 +715,11 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
* should be low too.
*/
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
- struct rb_root *root)
+ struct rb_root_cached *root)
{
struct bfq_entity *entity = &bfqq->entity;
- struct rb_node **new = &(root->rb_node), *parent = NULL;
+ struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
+ bool leftmost = true;
/*
* Do not insert if the queue is already associated with a
@@ -726,8 +748,10 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
}
if (entity->weight < __counter->weight)
new = &((*new)->rb_left);
- else
+ else {
new = &((*new)->rb_right);
+ leftmost = false;
+ }
}
bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
@@ -736,7 +760,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
/*
* In the unlucky event of an allocation failure, we just
* exit. This will cause the weight of queue to not be
- * considered in bfq_symmetric_scenario, which, in its turn,
+ * considered in bfq_asymmetric_scenario, which, in its turn,
* causes the scenario to be deemed wrongly symmetric in case
* bfqq's weight would have been the only weight making the
* scenario asymmetric. On the bright side, no unbalance will
@@ -750,7 +774,8 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqq->weight_counter->weight = entity->weight;
rb_link_node(&bfqq->weight_counter->weights_node, parent, new);
- rb_insert_color(&bfqq->weight_counter->weights_node, root);
+ rb_insert_color_cached(&bfqq->weight_counter->weights_node, root,
+ leftmost);
inc_counter:
bfqq->weight_counter->num_active++;
@@ -765,7 +790,7 @@ inc_counter:
*/
void __bfq_weights_tree_remove(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
- struct rb_root *root)
+ struct rb_root_cached *root)
{
if (!bfqq->weight_counter)
return;
@@ -774,7 +799,7 @@ void __bfq_weights_tree_remove(struct bfq_data *bfqd,
if (bfqq->weight_counter->num_active > 0)
goto reset_entity_pointer;
- rb_erase(&bfqq->weight_counter->weights_node, root);
+ rb_erase_cached(&bfqq->weight_counter->weights_node, root);
kfree(bfqq->weight_counter);
reset_entity_pointer:
@@ -889,7 +914,7 @@ static unsigned long bfq_serv_to_charge(struct request *rq,
struct bfq_queue *bfqq)
{
if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1 ||
- !bfq_symmetric_scenario(bfqq->bfqd))
+ bfq_asymmetric_scenario(bfqq->bfqd, bfqq))
return blk_rq_sectors(rq);
return blk_rq_sectors(rq) * bfq_async_charge_factor;
@@ -955,7 +980,7 @@ static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
* of several files
* mplayer took 23 seconds to start, if constantly weight-raised.
*
- * As for higher values than that accomodating the above bad
+ * As for higher values than that accommodating the above bad
* scenario, tests show that higher values would often yield
* the opposite of the desired result, i.e., would worsen
* responsiveness by allowing non-interactive applications to
@@ -994,6 +1019,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
else
bfq_clear_bfqq_IO_bound(bfqq);
+ bfqq->entity.new_weight = bic->saved_weight;
bfqq->ttime = bic->saved_ttime;
bfqq->wr_coeff = bic->saved_wr_coeff;
bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt;
@@ -1041,8 +1067,18 @@ static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
hlist_for_each_entry_safe(item, n, &bfqd->burst_list, burst_list_node)
hlist_del_init(&item->burst_list_node);
- hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
- bfqd->burst_size = 1;
+
+ /*
+ * Start the creation of a new burst list only if there is no
+ * active queue. See comments on the conditional invocation of
+ * bfq_handle_burst().
+ */
+ if (bfq_tot_busy_queues(bfqd) == 0) {
+ hlist_add_head(&bfqq->burst_list_node, &bfqd->burst_list);
+ bfqd->burst_size = 1;
+ } else
+ bfqd->burst_size = 0;
+
bfqd->burst_parent_entity = bfqq->entity.parent;
}
@@ -1098,7 +1134,8 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
* many parallel threads/processes. Examples are systemd during boot,
* or git grep. To help these processes get their job done as soon as
* possible, it is usually better to not grant either weight-raising
- * or device idling to their queues.
+ * or device idling to their queues, unless these queues must be
+ * protected from the I/O flowing through other active queues.
*
* In this comment we describe, firstly, the reasons why this fact
* holds, and, secondly, the next function, which implements the main
@@ -1110,7 +1147,10 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
* cumulatively served, the sooner the target job of these queues gets
* completed. As a consequence, weight-raising any of these queues,
* which also implies idling the device for it, is almost always
- * counterproductive. In most cases it just lowers throughput.
+ * counterproductive, unless there are other active queues to isolate
+ * these new queues from. If there no other active queues, then
+ * weight-raising these new queues just lowers throughput in most
+ * cases.
*
* On the other hand, a burst of queue creations may be caused also by
* the start of an application that does not consist of a lot of
@@ -1144,14 +1184,16 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
* are very rare. They typically occur if some service happens to
* start doing I/O exactly when the interactive task starts.
*
- * Turning back to the next function, it implements all the steps
- * needed to detect the occurrence of a large burst and to properly
- * mark all the queues belonging to it (so that they can then be
- * treated in a different way). This goal is achieved by maintaining a
- * "burst list" that holds, temporarily, the queues that belong to the
- * burst in progress. The list is then used to mark these queues as
- * belonging to a large burst if the burst does become large. The main
- * steps are the following.
+ * Turning back to the next function, it is invoked only if there are
+ * no active queues (apart from active queues that would belong to the
+ * same, possible burst bfqq would belong to), and it implements all
+ * the steps needed to detect the occurrence of a large burst and to
+ * properly mark all the queues belonging to it (so that they can then
+ * be treated in a different way). This goal is achieved by
+ * maintaining a "burst list" that holds, temporarily, the queues that
+ * belong to the burst in progress. The list is then used to mark
+ * these queues as belonging to a large burst if the burst does become
+ * large. The main steps are the following.
*
* . when the very first queue is created, the queue is inserted into the
* list (as it could be the first queue in a possible burst)
@@ -1596,6 +1638,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
*/
in_burst = bfq_bfqq_in_large_burst(bfqq);
soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
+ !BFQQ_TOTALLY_SEEKY(bfqq) &&
!in_burst &&
time_is_before_jiffies(bfqq->soft_rt_next_start) &&
bfqq->dispatched == 0;
@@ -1704,6 +1747,123 @@ static void bfq_add_request(struct request *rq)
bfqq->queued[rq_is_sync(rq)]++;
bfqd->queued++;
+ if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
+ /*
+ * Periodically reset inject limit, to make sure that
+ * the latter eventually drops in case workload
+ * changes, see step (3) in the comments on
+ * bfq_update_inject_limit().
+ */
+ if (time_is_before_eq_jiffies(bfqq->decrease_time_jif +
+ msecs_to_jiffies(1000))) {
+ /* invalidate baseline total service time */
+ bfqq->last_serv_time_ns = 0;
+
+ /*
+ * Reset pointer in case we are waiting for
+ * some request completion.
+ */
+ bfqd->waited_rq = NULL;
+
+ /*
+ * If bfqq has a short think time, then start
+ * by setting the inject limit to 0
+ * prudentially, because the service time of
+ * an injected I/O request may be higher than
+ * the think time of bfqq, and therefore, if
+ * one request was injected when bfqq remains
+ * empty, this injected request might delay
+ * the service of the next I/O request for
+ * bfqq significantly. In case bfqq can
+ * actually tolerate some injection, then the
+ * adaptive update will however raise the
+ * limit soon. This lucky circumstance holds
+ * exactly because bfqq has a short think
+ * time, and thus, after remaining empty, is
+ * likely to get new I/O enqueued---and then
+ * completed---before being expired. This is
+ * the very pattern that gives the
+ * limit-update algorithm the chance to
+ * measure the effect of injection on request
+ * service times, and then to update the limit
+ * accordingly.
+ *
+ * On the opposite end, if bfqq has a long
+ * think time, then start directly by 1,
+ * because:
+ * a) on the bright side, keeping at most one
+ * request in service in the drive is unlikely
+ * to cause any harm to the latency of bfqq's
+ * requests, as the service time of a single
+ * request is likely to be lower than the
+ * think time of bfqq;
+ * b) on the downside, after becoming empty,
+ * bfqq is likely to expire before getting its
+ * next request. With this request arrival
+ * pattern, it is very hard to sample total
+ * service times and update the inject limit
+ * accordingly (see comments on
+ * bfq_update_inject_limit()). So the limit is
+ * likely to be never, or at least seldom,
+ * updated. As a consequence, by setting the
+ * limit to 1, we avoid that no injection ever
+ * occurs with bfqq. On the downside, this
+ * proactive step further reduces chances to
+ * actually compute the baseline total service
+ * time. Thus it reduces chances to execute the
+ * limit-update algorithm and possibly raise the
+ * limit to more than 1.
+ */
+ if (bfq_bfqq_has_short_ttime(bfqq))
+ bfqq->inject_limit = 0;