summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJani Nikula <jani.nikula@intel.com>2018-11-20 13:14:08 +0200
committerJani Nikula <jani.nikula@intel.com>2018-11-20 13:14:08 +0200
commit2ac5e38ea4203852d6e99edd3cf11f044b0a409f (patch)
tree1ef02da98d56309368ad2b6a4e492bafe5bb4faf /block
parentf48cc647f3e196a3179d695d3c2d56c13e9dec98 (diff)
parent9235dd441af43599b9cdcce599a3da4083fcad3c (diff)
Merge drm/drm-next into drm-intel-next-queued
Pull in v4.20-rc3 via drm-next. Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig10
-rw-r--r--block/Kconfig.iosched3
-rw-r--r--block/Makefile1
-rw-r--r--block/bfq-iosched.c289
-rw-r--r--block/bfq-iosched.h53
-rw-r--r--block/bfq-wf2q.c43
-rw-r--r--block/bio-integrity.c12
-rw-r--r--block/bio.c48
-rw-r--r--block/blk-core.c284
-rw-r--r--block/blk-flush.c6
-rw-r--r--block/blk-integrity.c12
-rw-r--r--block/blk-iolatency.c208
-rw-r--r--block/blk-lib.c61
-rw-r--r--block/blk-merge.c137
-rw-r--r--block/blk-mq-debugfs.c14
-rw-r--r--block/blk-mq-sched.h4
-rw-r--r--block/blk-mq-tag.c69
-rw-r--r--block/blk-mq.c215
-rw-r--r--block/blk-pm.c216
-rw-r--r--block/blk-pm.h69
-rw-r--r--block/blk-settings.c2
-rw-r--r--block/blk-softirq.c5
-rw-r--r--block/blk-stat.c1
-rw-r--r--block/blk-sysfs.c15
-rw-r--r--block/blk-throttle.c41
-rw-r--r--block/blk-wbt.c2
-rw-r--r--block/blk-zoned.c359
-rw-r--r--block/blk.h91
-rw-r--r--block/bounce.c40
-rw-r--r--block/cfq-iosched.c12
-rw-r--r--block/elevator.c22
-rw-r--r--block/genhd.c19
-rw-r--r--block/ioctl.c4
-rw-r--r--block/kyber-iosched.c547
34 files changed, 1830 insertions, 1084 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 1f2469a0123c..f7045aa47edb 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -74,7 +74,6 @@ config BLK_DEV_BSG
config BLK_DEV_BSGLIB
bool "Block layer SG support v4 helper lib"
- default n
select BLK_DEV_BSG
select BLK_SCSI_REQUEST
help
@@ -107,7 +106,6 @@ config BLK_DEV_ZONED
config BLK_DEV_THROTTLING
bool "Block layer bio throttling support"
depends on BLK_CGROUP=y
- default n
---help---
Block layer bio throttling support. It can be used to limit
the IO rate to a device. IO rate policies are per cgroup and
@@ -119,7 +117,6 @@ config BLK_DEV_THROTTLING
config BLK_DEV_THROTTLING_LOW
bool "Block throttling .low limit interface support (EXPERIMENTAL)"
depends on BLK_DEV_THROTTLING
- default n
---help---
Add .low limit interface for block throttling. The low limit is a best
effort limit to prioritize cgroups. Depending on the setting, the limit
@@ -130,7 +127,6 @@ config BLK_DEV_THROTTLING_LOW
config BLK_CMDLINE_PARSER
bool "Block device command line partition parser"
- default n
---help---
Enabling this option allows you to specify the partition layout from
the kernel boot args. This is typically of use for embedded devices
@@ -141,7 +137,6 @@ config BLK_CMDLINE_PARSER
config BLK_WBT
bool "Enable support for block device writeback throttling"
- default n
---help---
Enabling this option enables the block layer to throttle buffered
background writeback from the VM, making it more smooth and having
@@ -152,7 +147,6 @@ config BLK_WBT
config BLK_CGROUP_IOLATENCY
bool "Enable support for latency based cgroup IO protection"
depends on BLK_CGROUP=y
- default n
---help---
Enabling this option enables the .latency interface for IO throttling.
The IO controller will attempt to maintain average IO latencies below
@@ -163,7 +157,6 @@ config BLK_CGROUP_IOLATENCY
config BLK_WBT_SQ
bool "Single queue writeback throttling"
- default n
depends on BLK_WBT
---help---
Enable writeback throttling by default on legacy single queue devices
@@ -228,4 +221,7 @@ config BLK_MQ_RDMA
depends on BLOCK && INFINIBAND
default y
+config BLK_PM
+ def_bool BLOCK && PM
+
source block/Kconfig.iosched
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index a4a8914bf7a4..f95a48b0d7b2 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -36,7 +36,6 @@ config IOSCHED_CFQ
config CFQ_GROUP_IOSCHED
bool "CFQ Group Scheduling support"
depends on IOSCHED_CFQ && BLK_CGROUP
- default n
---help---
Enable group IO scheduling in CFQ.
@@ -82,7 +81,6 @@ config MQ_IOSCHED_KYBER
config IOSCHED_BFQ
tristate "BFQ I/O scheduler"
- default n
---help---
BFQ I/O scheduler for BLK-MQ. BFQ distributes the bandwidth of
of the device among all processes according to their weights,
@@ -94,7 +92,6 @@ config IOSCHED_BFQ
config BFQ_GROUP_IOSCHED
bool "BFQ hierarchical scheduling support"
depends on IOSCHED_BFQ && BLK_CGROUP
- default n
---help---
Enable hierarchical scheduling in BFQ, using the blkio
diff --git a/block/Makefile b/block/Makefile
index 572b33f32c07..27eac600474f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -37,3 +37,4 @@ obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
+obj-$(CONFIG_BLK_PM) += blk-pm.o
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 653100fb719e..3a27d31fcda6 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -624,12 +624,13 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
}
/*
- * Tell whether there are active queues or groups with differentiated weights.
+ * Tell whether there are active queues with different weights or
+ * active groups.
*/
-static bool bfq_differentiated_weights(struct bfq_data *bfqd)
+static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
{
/*
- * For weights to differ, at least one of the trees must contain
+ * For queue weights to differ, queue_weights_tree must contain
* at least two nodes.
*/
return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
@@ -637,9 +638,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd)
bfqd->queue_weights_tree.rb_node->rb_right)
#ifdef CONFIG_BFQ_GROUP_IOSCHED
) ||
- (!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
- (bfqd->group_weights_tree.rb_node->rb_left ||
- bfqd->group_weights_tree.rb_node->rb_right)
+ (bfqd->num_active_groups > 0
#endif
);
}
@@ -657,26 +656,25 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd)
* 3) all active groups at the same level in the groups tree have the same
* number of children.
*
- * Unfortunately, keeping the necessary state for evaluating exactly the
- * above symmetry conditions would be quite complex and time-consuming.
- * Therefore this function evaluates, instead, the following stronger
- * sub-conditions, for which it is much easier to maintain the needed
- * state:
+ * Unfortunately, keeping the necessary state for evaluating exactly
+ * the last two symmetry sub-conditions above would be quite complex
+ * and time consuming. Therefore this function evaluates, instead,
+ * only the following stronger two sub-conditions, for which it is
+ * much easier to maintain the needed state:
* 1) all active queues have the same weight,
- * 2) all active groups have the same weight,
- * 3) all active groups have at most one active child each.
- * In particular, the last two conditions are always true if hierarchical
- * support and the cgroups interface are not enabled, thus no state needs
- * to be maintained in this case.
+ * 2) there are no active groups.
+ * In particular, the last condition is always true if hierarchical
+ * support or the cgroups interface are not enabled, thus no state
+ * needs to be maintained in this case.
*/
static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
{
- return !bfq_differentiated_weights(bfqd);
+ return !bfq_varied_queue_weights_or_active_groups(bfqd);
}
/*
* If the weight-counter tree passed as input contains no counter for
- * the weight of the input entity, then add that counter; otherwise just
+ * the weight of the input queue, then add that counter; otherwise just
* increment the existing counter.
*
* Note that weight-counter trees contain few nodes in mostly symmetric
@@ -687,25 +685,25 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
* In most scenarios, the rate at which nodes are created/destroyed
* should be low too.
*/
-void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
+void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct rb_root *root)
{
+ struct bfq_entity *entity = &bfqq->entity;
struct rb_node **new = &(root->rb_node), *parent = NULL;
/*
- * Do not insert if the entity is already associated with a
+ * Do not insert if the queue is already associated with a
* counter, which happens if:
- * 1) the entity is associated with a queue,
- * 2) a request arrival has caused the queue to become both
+ * 1) a request arrival has caused the queue to become both
* non-weight-raised, and hence change its weight, and
* backlogged; in this respect, each of the two events
* causes an invocation of this function,
- * 3) this is the invocation of this function caused by the
+ * 2) this is the invocation of this function caused by the
* second event. This second invocation is actually useless,
* and we handle this fact by exiting immediately. More
* efficient or clearer solutions might possibly be adopted.
*/
- if (entity->weight_counter)
+ if (bfqq->weight_counter)
return;
while (*new) {
@@ -715,7 +713,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
parent = *new;
if (entity->weight == __counter->weight) {
- entity->weight_counter = __counter;
+ bfqq->weight_counter = __counter;
goto inc_counter;
}
if (entity->weight < __counter->weight)
@@ -724,66 +722,67 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
new = &((*new)->rb_right);
}
- entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
- GFP_ATOMIC);
+ bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
+ GFP_ATOMIC);
/*
* In the unlucky event of an allocation failure, we just
- * exit. This will cause the weight of entity to not be
- * considered in bfq_differentiated_weights, which, in its
- * turn, causes the scenario to be deemed wrongly symmetric in
- * case entity's weight would have been the only weight making
- * the scenario asymmetric. On the bright side, no unbalance
- * will however occur when entity becomes inactive again (the
- * invocation of this function is triggered by an activation
- * of entity). In fact, bfq_weights_tree_remove does nothing
- * if !entity->weight_counter.
+ * exit. This will cause the weight of queue to not be
+ * considered in bfq_varied_queue_weights_or_active_groups,
+ * which, in its turn, causes the scenario to be deemed
+ * wrongly symmetric in case bfqq's weight would have been
+ * the only weight making the scenario asymmetric. On the
+ * bright side, no unbalance will however occur when bfqq
+ * becomes inactive again (the invocation of this function
+ * is triggered by an activation of queue). In fact,
+ * bfq_weights_tree_remove does nothing if
+ * !bfqq->weight_counter.
*/
- if (unlikely(!entity->weight_counter))
+ if (unlikely(!bfqq->weight_counter))
return;
- entity->weight_counter->weight = entity->weight;
- rb_link_node(&entity->weight_counter->weights_node, parent, new);
- rb_insert_color(&entity->weight_counter->weights_node, root);
+ bfqq->weight_counter->weight = entity->weight;
+ rb_link_node(&bfqq->weight_counter->weights_node, parent, new);
+ rb_insert_color(&bfqq->weight_counter->weights_node, root);
inc_counter:
- entity->weight_counter->num_active++;
+ bfqq->weight_counter->num_active++;
}
/*
- * Decrement the weight counter associated with the entity, and, if the
+ * Decrement the weight counter associated with the queue, and, if the
* counter reaches 0, remove the counter from the tree.
* See the comments to the function bfq_weights_tree_add() for considerations
* about overhead.
*/
void __bfq_weights_tree_remove(struct bfq_data *bfqd,
- struct bfq_entity *entity,
+ struct bfq_queue *bfqq,
struct rb_root *root)
{
- if (!entity->weight_counter)
+ if (!bfqq->weight_counter)
return;
- entity->weight_counter->num_active--;
- if (entity->weight_counter->num_active > 0)
+ bfqq->weight_counter->num_active--;
+ if (bfqq->weight_counter->num_active > 0)
goto reset_entity_pointer;
- rb_erase(&entity->weight_counter->weights_node, root);
- kfree(entity->weight_counter);
+ rb_erase(&bfqq->weight_counter->weights_node, root);
+ kfree(bfqq->weight_counter);
reset_entity_pointer:
- entity->weight_counter = NULL;
+ bfqq->weight_counter = NULL;
}
/*
- * Invoke __bfq_weights_tree_remove on bfqq and all its inactive
- * parent entities.
+ * Invoke __bfq_weights_tree_remove on bfqq and decrement the number
+ * of active groups for each queue's inactive parent entity.
*/
void bfq_weights_tree_remove(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
struct bfq_entity *entity = bfqq->entity.parent;
- __bfq_weights_tree_remove(bfqd, &bfqq->entity,
+ __bfq_weights_tree_remove(bfqd, bfqq,
&bfqd->queue_weights_tree);
for_each_entity(entity) {
@@ -797,17 +796,13 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
* next_in_service for details on why
* in_service_entity must be checked too).
*
- * As a consequence, the weight of entity is
- * not to be removed. In addition, if entity
- * is active, then its parent entities are
- * active as well, and thus their weights are
- * not to be removed either. In the end, this
- * loop must stop here.
+ * As a consequence, its parent entities are
+ * active as well, and thus this loop must
+ * stop here.
*/
break;
}
- __bfq_weights_tree_remove(bfqd, entity,
- &bfqd->group_weights_tree);
+ bfqd->num_active_groups--;
}
}
@@ -3182,6 +3177,13 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4);
}
+static bool bfq_bfqq_injectable(struct bfq_queue *bfqq)
+{
+ return BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 &&
+ blk_queue_nonrot(bfqq->bfqd->queue) &&
+ bfqq->bfqd->hw_tag;
+}
+
/**
* bfq_bfqq_expire - expire a queue.
* @bfqd: device owning the queue.
@@ -3291,6 +3293,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
if (ref == 1) /* bfqq is gone, no more actions on it */
return;
+ bfqq->injected_service = 0;
+
/* mark bfqq as waiting a request only if a bic still points to it */
if (!bfq_bfqq_busy(bfqq) &&
reason != BFQQE_BUDGET_TIMEOUT &&
@@ -3497,9 +3501,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
* symmetric scenario where:
* (i) each of these processes must get the same throughput as
* the others;
- * (ii) all these processes have the same I/O pattern
- (either sequential or random).
- * In fact, in such a scenario, the drive will tend to treat
+ * (ii) the I/O of each process has the same properties, in
+ * terms of locality (sequential or random), direction
+ * (reads or writes), request sizes, greediness
+ * (from I/O-bound to sporadic), and so on.
+ * In fact, in such a scenario, the drive tends to treat
* the requests of each of these processes in about the same
* way as the requests of the others, and thus to provide
* each of these processes with about the same throughput
@@ -3508,18 +3514,50 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
* certainly needed to guarantee that bfqq receives its
* assigned fraction of the device throughput (see [1] for
* details).
+ * The problem is that idling may significantly reduce
+ * throughput with certain combinations of types of I/O and
+ * devices. An important example is sync random I/O, on flash
+ * storage with command queueing. So, unless bfqq falls in the
+ * above cases where idling also boosts throughput, it would
+ * be important to check conditions (i) and (ii) accurately,
+ * so as to avoid idling when not strictly needed for service
+ * guarantees.
+ *
+ * Unfortunately, it is extremely difficult to thoroughly
+ * check condition (ii). And, in case there are active groups,
+ * it becomes very difficult to check condition (i) too. In
+ * fact, if there are active groups, then, for condition (i)
+ * to become false, it is enough that an active group contains
+ * more active processes or sub-groups than some other active
+ * group. We address this issue with the following bi-modal
+ * behavior, implemented in the function
+ * bfq_symmetric_scenario().
*
- * We address this issue by controlling, actually, only the
- * symmetry sub-condition (i), i.e., provided that
- * sub-condition (i) holds, idling is not performed,
- * regardless of whether sub-condition (ii) holds. In other
- * words, only if sub-condition (i) holds, then idling is
+ * If there are active groups, then the scenario is tagged as
+ * asymmetric, conservatively, without checking any of the
+ * conditions (i) and (ii). So the device is idled for bfqq.
+ * This behavior matches also the fact that groups are created
+ * exactly if controlling I/O (to preserve bandwidth and
+ * latency guarantees) is a primary concern.
+ *
+ * On the opposite end, if there are no active groups, then
+ * only condition (i) is actually controlled, i.e., provided
+ * that condition (i) holds, idling is not performed,
+ * regardless of whether condition (ii) holds. In other words,
+ * only if condition (i) does not hold, then idling is
* allowed, and the device tends to be prevented from queueing
- * many requests, possibly of several processes. The reason
- * for not controlling also sub-condition (ii) is that we
- * exploit preemption to preserve guarantees in case of
- * symmetric scenarios, even if (ii) does not hold, as
- * explained in the next two paragraphs.
+ * many requests, possibly of several processes. Since there
+ * are no active groups, then, to control condition (i) it is
+ * enough to check whether all active queues have the same
+ * weight.
+ *
+ * Not checking condition (ii) evidently exposes bfqq to the
+ * risk of getting less throughput than its fair share.
+ * However, for queues with the same weight, a further
+ * mechanism, preemption, mitigates or even eliminates this
+ * problem. And it does so without consequences on overall
+ * throughput. This mechanism and its benefits are explained
+ * in the next three paragraphs.
*
* Even if a queue, say Q, is expired when it remains idle, Q
* can still preempt the new in-service queue if the next
@@ -3533,11 +3571,7 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
* idling allows the internal queues of the device to contain
* many requests, and thus to reorder requests, we can rather
* safely assume that the internal scheduler still preserves a
- * minimum of mid-term fairness. The motivation for using
- * preemption instead of idling is that, by not idling,
- * service guarantees are preserved without minimally
- * sacrificing throughput. In other words, both a high
- * throughput and its desired distribution are obtained.
+ * minimum of mid-term fairness.
*
* More precisely, this preemption-based, idleless approach
* provides fairness in terms of IOPS, and not sectors per
@@ -3556,22 +3590,27 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
* 1024/8 times as high as the service received by the other
* queue.
*
- * On the other hand, device idling is performed, and thus
- * pure sector-domain guarantees are provided, for the
- * following queues, which are likely to need stronger
- * throughput guarantees: weight-raised queues, and queues
- * with a higher weight than other queues. When such queues
- * are active, sub-condition (i) is false, which triggers
- * device idling.
+ * The motivation for using preemption instead of idling (for
+ * queues with the same weight) is that, by not idling,
+ * service guarantees are preserved (completely or at least in
+ * part) without minimally sacrificing throughput. And, if
+ * there is no active group, then the primary expectation for
+ * this device is probably a high throughput.
*
- * According to the above considerations, the next variable is
- * true (only) if sub-condition (i) holds. To compute the
- * value of this variable, we not only use the return value of
- * the function bfq_symmetric_scenario(), but also check
- * whether bfqq is being weight-raised, because
- * bfq_symmetric_scenario() does not take into account also
- * weight-raised queues (see comments on
- * bfq_weights_tree_add()).
+ * We are now left only with explaining the additional
+ * compound condition that is checked below for deciding
+ * whether the scenario is asymmetric. To explain this
+ * compound condition, we need to add that the function
+ * bfq_symmetric_scenario checks the weights of only
+ * non-weight-raised queues, for efficiency reasons (see
+ * comments on bfq_weights_tree_add()). Then the fact that
+ * bfqq is weight-raised is checked explicitly here. More
+ * precisely, the compound condition below takes into account
+ * also the fact that, even if bfqq is being weight-raised,
+ * the scenario is still symmetric if all active queues happen
+ * to be weight-raised. Actually, we should be even more
+ * precise here, and differentiate between interactive weight
+ * raising and soft real-time weight raising.
*
* As a side note, it is worth considering that the above
* device-idling countermeasures may however fail in the
@@ -3583,7 +3622,8 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
* to let requests be served in the desired order until all
* the requests already queued in the device have been served.
*/
- asymmetric_scenario = bfqq->wr_coeff > 1 ||
+ asymmetric_scenario = (bfqq->wr_coeff > 1 &&
+ bfqd->wr_busy_queues < bfqd->busy_queues) ||
!bfq_symmetric_scenario(bfqd);
/*
@@ -3629,6 +3669,30 @@ static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
return RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_better_to_idle(bfqq);
}
+static struct bfq_queue *bfq_choose_bfqq_for_injection(struct bfq_data *bfqd)
+{
+ struct bfq_queue *bfqq;
+
+ /*
+ * A linear search; but, with a high probability, very few
+ * steps are needed to find a candidate queue, i.e., a queue
+ * with enough budget left for its next request. In fact:
+ * - BFQ dynamically updates the budget of every queue so as
+ * to accommodate the expected backlog of the queue;
+ * - if a queue gets all its requests dispatched as injected
+ * service, then the queue is removed from the active list
+ * (and re-added only if it gets new requests, but with
+ * enough budget for its new backlog).
+ */
+ list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list)
+ if (!RB_EMPTY_ROOT(&bfqq->sort_list) &&
+ bfq_serv_to_charge(bfqq->next_rq, bfqq) <=
+ bfq_bfqq_budget_left(bfqq))
+ return bfqq;
+
+ return NULL;
+}
+
/*
* Select a queue for service. If we have a current queue in service,
* check whether to continue servicing it, or retrieve and set a new one.
@@ -3710,10 +3774,19 @@ check_queue:
* No requests pending. However, if the in-service queue is idling
* for a new request, or has requests waiting for a completion and
* may idle after their completion, then keep it anyway.
+ *
+ * Yet, to boost throughput, inject service from other queues if
+ * possible.
*/
if (bfq_bfqq_wait_request(bfqq) ||
(bfqq->dispatched != 0 && bfq_better_to_idle(bfqq))) {
- bfqq = NULL;
+ if (bfq_bfqq_injectable(bfqq) &&
+ bfqq->injected_service * bfqq->inject_coeff <
+ bfqq->entity.service * 10)
+ bfqq = bfq_choose_bfqq_for_injection(bfqd);
+ else
+ bfqq = NULL;
+
goto keep_queue;
}
@@ -3803,6 +3876,14 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd,
bfq_dispatch_remove(bfqd->queue, rq);
+ if (bfqq != bfqd->in_service_queue) {
+ if (likely(bfqd->in_service_queue))
+ bfqd->in_service_queue->injected_service +=
+ bfq_serv_to_charge(rq, bfqq);
+
+ goto return_rq;
+ }
+
/*
* If weight raising has to terminate for bfqq, then next
* function causes an immediate update of bfqq's weight,
@@ -3821,13 +3902,12 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd,
* belongs to CLASS_IDLE and other queues are waiting for
* service.
*/
- if (bfqd->busy_queues > 1 && bfq_class_idle(bfqq))
- goto expire;
-
- return rq;
+ if (!(bfqd->busy_queues > 1 && bfq_class_idle(bfqq)))
+ goto return_rq;
-expire:
bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED);
+
+return_rq:
return rq;
}
@@ -4232,6 +4312,13 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_mark_bfqq_has_short_ttime(bfqq);
bfq_mark_bfqq_sync(bfqq);
bfq_mark_bfqq_just_created(bfqq);
+ /*
+ * Aggressively inject a lot of service: up to 90%.
+ * This coefficient remains constant during bfqq life,
+ * but this behavior might be changed, after enough
+ * testing and tuning.
+ */
+ bfqq->inject_coeff = 1;
} else
bfq_clear_bfqq_sync(bfqq);
@@ -5330,7 +5417,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
bfqd->queue_weights_tree = RB_ROOT;
- bfqd->group_weights_tree = RB_ROOT;
+ bfqd->num_active_groups = 0;
INIT_LIST_HEAD(&bfqd->active_list);
INIT_LIST_HEAD(&bfqd->idle_list);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index a8a2e5aca4d4..77651d817ecd 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -108,15 +108,14 @@ struct bfq_sched_data {
};
/**
- * struct bfq_weight_counter - counter of the number of all active entities
+ * struct bfq_weight_counter - counter of the number of all active queues
* with a given weight.
*/
struct bfq_weight_counter {
- unsigned int weight; /* weight of the entities this counter refers to */
- unsigned int num_active; /* nr of active entities with this weight */
+ unsigned int weight; /* weight of the queues this counter refers to */
+ unsigned int num_active; /* nr of active queues with this weight */
/*
- * Weights tree member (see bfq_data's @queue_weights_tree and
- * @group_weights_tree)
+ * Weights tree member (see bfq_data's @queue_weights_tree)
*/
struct rb_node weights_node;
};
@@ -151,8 +150,6 @@ struct bfq_weight_counter {
struct bfq_entity {
/* service_tree member */
struct rb_node rb_node;
- /* pointer to the weight counter associated with this entity */
- struct bfq_weight_counter *weight_counter;
/*
* Flag, true if the entity is on a tree (either the active or
@@ -266,6 +263,9 @@ struct bfq_queue {
/* entity representing this queue in the scheduler */
struct bfq_entity entity;
+ /* pointer to the weight counter associated with this entity */
+ struct bfq_weight_counter *weight_counter;
+
/* maximum budget allowed from the feedback mechanism */
int max_budget;
/* budget expiration (in jiffies) */
@@ -351,6 +351,32 @@ struct bfq_queue {
unsigned long split_time; /* time of last split */
unsigned long first_IO_time; /* time of first I/O for this queue */
+
+ /* max service rate measured so far */
+ u32 max_service_rate;
+ /*
+ * Ratio between the service received by bfqq while it is in
+ * service, and the cumulative service (of requests of other
+ * queues) that may be injected while bfqq is empty but still
+ * in service. To increase precision, the coefficient is
+ * measured in tenths of unit. Here are some example of (1)
+ * ratios, (2) resulting percentages of service injected
+ * w.r.t. to the total service dispatched while bfqq is in
+ * service, and (3) corresponding values of the coefficient:
+ * 1 (50%) -> 10
+ * 2 (33%) -> 20
+ * 10 (9%) -> 100
+ * 9.9 (9%) -> 99
+ * 1.5 (40%) -> 15
+ * 0.5 (66%) -> 5
+ * 0.1 (90%) -> 1
+ *
+ * So, if the coefficient is lower than 10, then
+ * injected service is more than bfqq service.
+ */
+ unsigned int inject_coeff;
+ /* amount of service injected in current service slot */
+ unsigned int injected_service;
};
/**
@@ -423,14 +449,9 @@ struct bfq_data {
*/
struct rb_root queue_weights_tree;
/*
- * rbtree of non-queue @bfq_entity weight counters, sorted by
- * weight. Used to keep track of whether all @bfq_groups have
- * the same weight. The tree contains one counter for each
- * distinct weight associated to some active @bfq_group (see
- * the comments to the functions bfq_weights_tree_[add|remove]
- * for further details).
+ * number of groups with requests still waiting for completion
*/
- struct rb_root group_weights_tree;
+ unsigned int num_active_groups;
/*
* Number of bfq_queues containing requests (including the
@@ -825,10 +846,10 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
-void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
+void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct rb_root *root);
void __bfq_weights_tree_remove(struct bfq_data *bfqd,
- struct bfq_entity *entity,
+ struct bfq_queue *bfqq,
struct rb_root *root);
void bfq_weights_tree_remove(struct bfq_data *bfqd,
struct bfq_queue *bfqq);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index ae52bff43ce4..4b0d5fb69160 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -788,25 +788,23 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
new_weight = entity->orig_weight *
(bfqq ? bfqq->wr_coeff : 1);
/*
- * If the weight of the entity changes, remove the entity
- * from its old weight counter (if there is a counter
- * associated with the entity), and add it to the counter
<