summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2005-06-27 10:55:12 +0200
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-27 14:33:29 -0700
commit22e2c507c301c3dbbcf91b4948b88f78842ee6c9 (patch)
tree9a97c91d1362e69703aa286021daffb8a5456f4c /drivers/block
parent020f46a39eb7b99a575b9f4d105fce2b142acdf1 (diff)
[PATCH] Update cfq io scheduler to time sliced design
This updates the CFQ io scheduler to the new time sliced design (cfq v3). It provides full process fairness, while giving excellent aggregate system throughput even for many competing processes. It supports io priorities, either inherited from the cpu nice value or set directly with the ioprio_get/set syscalls. The latter closely mimic set/getpriority. This import is based on my latest from -mm. Signed-off-by: Jens Axboe <axboe@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/as-iosched.c5
-rw-r--r--drivers/block/cfq-iosched.c1906
-rw-r--r--drivers/block/deadline-iosched.c3
-rw-r--r--drivers/block/elevator.c9
-rw-r--r--drivers/block/ll_rw_blk.c59
5 files changed, 1320 insertions, 662 deletions
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 3410b4d294b9..91aeb678135d 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -1806,7 +1806,8 @@ static void as_put_request(request_queue_t *q, struct request *rq)
rq->elevator_private = NULL;
}
-static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+static int as_set_request(request_queue_t *q, struct request *rq,
+ struct bio *bio, int gfp_mask)
{
struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1827,7 +1828,7 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
return 1;
}
-static int as_may_queue(request_queue_t *q, int rw)
+static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
{
int ret = ELV_MQUEUE_MAY;
struct as_data *ad = q->elevator->elevator_data;
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index 3ac47dde64da..35f6e569d5e5 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -21,22 +21,33 @@
#include <linux/hash.h>
#include <linux/rbtree.h>
#include <linux/mempool.h>
-
-static unsigned long max_elapsed_crq;
-static unsigned long max_elapsed_dispatch;
+#include <linux/ioprio.h>
+#include <linux/writeback.h>
/*
* tunables
*/
static int cfq_quantum = 4; /* max queue in one round of service */
static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
-static int cfq_service = HZ; /* period over which service is avg */
-static int cfq_fifo_expire_r = HZ / 2; /* fifo timeout for sync requests */
-static int cfq_fifo_expire_w = 5 * HZ; /* fifo timeout for async requests */
-static int cfq_fifo_rate = HZ / 8; /* fifo expiry rate */
+static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
static int cfq_back_penalty = 2; /* penalty of a backwards seek */
+static int cfq_slice_sync = HZ / 10;
+static int cfq_slice_async = HZ / 50;
+static int cfq_slice_async_rq = 2;
+static int cfq_slice_idle = HZ / 50;
+
+#define CFQ_IDLE_GRACE (HZ / 10)
+#define CFQ_SLICE_SCALE (5)
+
+#define CFQ_KEY_ASYNC (0)
+
+/*
+ * disable queueing at the driver/hardware level
+ */
+static int cfq_max_depth = 1;
+
/*
* for the hash of cfqq inside the cfqd
*/
@@ -55,6 +66,7 @@ static int cfq_back_penalty = 2; /* penalty of a backwards seek */
#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
+#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
#define RQ_DATA(rq) (rq)->elevator_private
@@ -75,78 +87,101 @@ static int cfq_back_penalty = 2; /* penalty of a backwards seek */
#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
#define rq_rb_key(rq) (rq)->sector
-/*
- * threshold for switching off non-tag accounting
- */
-#define CFQ_MAX_TAG (4)
-
-/*
- * sort key types and names
- */
-enum {
- CFQ_KEY_PGID,
- CFQ_KEY_TGID,
- CFQ_KEY_UID,
- CFQ_KEY_GID,
- CFQ_KEY_LAST,
-};
-
-static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL };
-
static kmem_cache_t *crq_pool;
static kmem_cache_t *cfq_pool;
static kmem_cache_t *cfq_ioc_pool;
+#define CFQ_PRIO_LISTS IOPRIO_BE_NR
+#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
+#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
+
+#define cfq_cfqq_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)
+
+/*
+ * Per block device queue structure
+ */
struct cfq_data {
- struct list_head rr_list;
+ atomic_t ref;
+ request_queue_t *queue;
+
+ /*
+ * rr list of queues with requests and the count of them
+ */
+ struct list_head rr_list[CFQ_PRIO_LISTS];
+ struct list_head busy_rr;
+ struct list_head cur_rr;
+ struct list_head idle_rr;
+ unsigned int busy_queues;
+
+ /*
+ * non-ordered list of empty cfqq's
+ */
struct list_head empty_list;
+ /*
+ * cfqq lookup hash
+ */
struct hlist_head *cfq_hash;
- struct hlist_head *crq_hash;
- /* queues on rr_list (ie they have pending requests */
- unsigned int busy_queues;
+ /*
+ * global crq hash for all queues
+ */
+ struct hlist_head *crq_hash;
unsigned int max_queued;
- atomic_t ref;
+ mempool_t *crq_pool;
- int key_type;
+ int rq_in_driver;
- mempool_t *crq_pool;
+ /*
+ * schedule slice state info
+ */
+ /*
+ * idle window management
+ */
+ struct timer_list idle_slice_timer;
+ struct work_struct unplug_work;
- request_queue_t *queue;
+ struct cfq_queue *active_queue;
+ struct cfq_io_context *active_cic;
+ int cur_prio, cur_end_prio;
+ unsigned int dispatch_slice;
+
+ struct timer_list idle_class_timer;
sector_t last_sector;
+ unsigned long last_end_request;
- int rq_in_driver;
+ unsigned int rq_starved;
/*
* tunables, see top of file
*/
unsigned int cfq_quantum;
unsigned int cfq_queued;
- unsigned int cfq_fifo_expire_r;
- unsigned int cfq_fifo_expire_w;
- unsigned int cfq_fifo_batch_expire;
+ unsigned int cfq_fifo_expire[2];
unsigned int cfq_back_penalty;
unsigned int cfq_back_max;
- unsigned int find_best_crq;
-
- unsigned int cfq_tagged;
+ unsigned int cfq_slice[2];
+ unsigned int cfq_slice_async_rq;
+ unsigned int cfq_slice_idle;
+ unsigned int cfq_max_depth;
};
+/*
+ * Per process-grouping structure
+ */
struct cfq_queue {
/* reference count */
atomic_t ref;
/* parent cfq_data */
struct cfq_data *cfqd;
- /* hash of mergeable requests */
+ /* cfqq lookup hash */
struct hlist_node cfq_hash;
/* hash key */
- unsigned long key;
- /* whether queue is on rr (or empty) list */
- int on_rr;
+ unsigned int key;
/* on either rr or empty list of cfqd */
struct list_head cfq_list;
/* sorted list of pending requests */
@@ -158,21 +193,35 @@ struct cfq_queue {
/* currently allocated requests */
int allocated[2];
/* fifo list of requests in sort_list */
- struct list_head fifo[2];
- /* last time fifo expired */
- unsigned long last_fifo_expire;
-
- int key_type;
-
- unsigned long service_start;
- unsigned long service_used;
+ struct list_head fifo;
- unsigned int max_rate;
+ unsigned long slice_start;
+ unsigned long slice_end;
+ unsigned long slice_left;
+ unsigned long service_last;
/* number of requests that have been handed to the driver */
int in_flight;
- /* number of currently allocated requests */
- int alloc_limit[2];
+
+ /* io prio of this group */
+ unsigned short ioprio, org_ioprio;
+ unsigned short ioprio_class, org_ioprio_class;
+
+ /* whether queue is on rr (or empty) list */
+ unsigned on_rr : 1;
+ /* idle slice, waiting for new request submission */
+ unsigned wait_request : 1;
+ /* set when wait_request gets set, reset on first rq alloc */
+ unsigned must_alloc : 1;
+ /* only gets one must_alloc per slice */
+ unsigned must_alloc_slice : 1;
+ /* idle slice, request added, now waiting to dispatch it */
+ unsigned must_dispatch : 1;
+ /* fifo expire per-slice */
+ unsigned fifo_expire : 1;
+
+ unsigned idle_window : 1;
+ unsigned prio_changed : 1;
};
struct cfq_rq {
@@ -184,42 +233,17 @@ struct cfq_rq {
struct cfq_queue *cfq_queue;
struct cfq_io_context *io_context;
- unsigned long service_start;
- unsigned long queue_start;
-
- unsigned int in_flight : 1;
- unsigned int accounted : 1;
- unsigned int is_sync : 1;
- unsigned int is_write : 1;
+ unsigned in_flight : 1;
+ unsigned accounted : 1;
+ unsigned is_sync : 1;
+ unsigned requeued : 1;
};
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long);
+static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int);
static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
-static void cfq_update_next_crq(struct cfq_rq *);
static void cfq_put_cfqd(struct cfq_data *cfqd);
-/*
- * what the fairness is based on (ie how processes are grouped and
- * differentiated)
- */
-static inline unsigned long
-cfq_hash_key(struct cfq_data *cfqd, struct task_struct *tsk)
-{
- /*
- * optimize this so that ->key_type is the offset into the struct
- */
- switch (cfqd->key_type) {
- case CFQ_KEY_PGID:
- return process_group(tsk);
- default:
- case CFQ_KEY_TGID:
- return tsk->tgid;
- case CFQ_KEY_UID:
- return tsk->uid;
- case CFQ_KEY_GID:
- return tsk->gid;
- }
-}
+#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)
/*
* lots of deadline iosched dupes, can be abstracted later...
@@ -235,16 +259,12 @@ static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
if (q->last_merge == crq->request)
q->last_merge = NULL;
-
- cfq_update_next_crq(crq);
}
static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
{
const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
- BUG_ON(!hlist_unhashed(&crq->hash));
-
hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
}
@@ -257,8 +277,6 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
struct cfq_rq *crq = list_entry_hash(entry);
struct request *__rq = crq->request;
- BUG_ON(hlist_unhashed(&crq->hash));
-
if (!rq_mergeable(__rq)) {
cfq_del_crq_hash(crq);
continue;
@@ -287,36 +305,16 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
return crq2;
if (crq2 == NULL)
return crq1;
+ if (crq1->requeued)
+ return crq1;
+ if (crq2->requeued)
+ return crq2;
s1 = crq1->request->sector;
s2 = crq2->request->sector;
last = cfqd->last_sector;
-#if 0
- if (!list_empty(&cfqd->queue->queue_head)) {
- struct list_head *entry = &cfqd->queue->queue_head;
- unsigned long distance = ~0UL;
- struct request *rq;
-
- while ((entry = entry->prev) != &cfqd->queue->queue_head) {
- rq = list_entry_rq(entry);
-
- if (blk_barrier_rq(rq))
- break;
-
- if (distance < abs(s1 - rq->sector + rq->nr_sectors)) {
- distance = abs(s1 - rq->sector +rq->nr_sectors);
- last = rq->sector + rq->nr_sectors;
- }
- if (distance < abs(s2 - rq->sector + rq->nr_sectors)) {
- distance = abs(s2 - rq->sector +rq->nr_sectors);
- last = rq->sector + rq->nr_sectors;
- }
- }
- }
-#endif
-
/*
* by definition, 1KiB is 2 sectors
*/
@@ -377,11 +375,13 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
struct rb_node *rbnext, *rbprev;
- if (!ON_RB(&last->rb_node))
- return NULL;
-
- if ((rbnext = rb_next(&last->rb_node)) == NULL)
+ if (ON_RB(&last->rb_node))
+ rbnext = rb_next(&last->rb_node);
+ else {
rbnext = rb_first(&cfqq->sort_list);
+ if (rbnext == &last->rb_node)
+ rbnext = NULL;
+ }
rbprev = rb_prev(&last->rb_node);
@@ -401,67 +401,53 @@ static void cfq_update_next_crq(struct cfq_rq *crq)
cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
}
-static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
+static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
{
- struct list_head *head = &cfqq->cfqd->rr_list;
- struct list_head *next, *prev;
-
- /*
- * list might still be ordered
- */
- next = cfqq->cfq_list.next;
- if (next != head) {
- struct cfq_queue *cnext = list_entry_cfqq(next);
+ struct cfq_data *cfqd = cfqq->cfqd;
+ struct list_head *list, *entry;
- if (cfqq->service_used > cnext->service_used)
- return 1;
- }
+ BUG_ON(!cfqq->on_rr);
- prev = cfqq->cfq_list.prev;
- if (prev != head) {
- struct cfq_queue *cprev = list_entry_cfqq(prev);
+ list_del(&cfqq->cfq_list);
- if (cfqq->service_used < cprev->service_used)
- return 1;
+ if (cfq_class_rt(cfqq))
+ list = &cfqd->cur_rr;
+ else if (cfq_class_idle(cfqq))
+ list = &cfqd->idle_rr;
+ else {
+ /*
+ * if cfqq has requests in flight, don't allow it to be
+ * found in cfq_set_active_queue before it has finished them.
+ * this is done to increase fairness between a process that
+ * has lots of io pending vs one that only generates one
+ * sporadically or synchronously
+ */
+ if (cfqq->in_flight)
+ list = &cfqd->busy_rr;
+ else
+ list = &cfqd->rr_list[cfqq->ioprio];
}
- return 0;
-}
-
-static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
-{
- struct list_head *entry = &cfqq->cfqd->rr_list;
-
- if (!cfqq->on_rr)
- return;
- if (!new_queue && !cfq_check_sort_rr_list(cfqq))
+ /*
+ * if queue was preempted, just add to front to be fair. busy_rr
+ * isn't sorted.
+ */
+ if (preempted || list == &cfqd->busy_rr) {
+ list_add(&cfqq->cfq_list, list);
return;
-
- list_del(&cfqq->cfq_list);
+ }
/*
- * sort by our mean service_used, sub-sort by in-flight requests
+ * sort by when queue was last serviced
*/
- while ((entry = entry->prev) != &cfqq->cfqd->rr_list) {
+ entry = list;
+ while ((entry = entry->prev) != list) {
struct cfq_queue *__cfqq = list_entry_cfqq(entry);
- if (cfqq->service_used > __cfqq->service_used)
+ if (!__cfqq->service_last)
+ break;
+ if (time_before(__cfqq->service_last, cfqq->service_last))
break;
- else if (cfqq->service_used == __cfqq->service_used) {
- struct list_head *prv;
-
- while ((prv = entry->prev) != &cfqq->cfqd->rr_list) {
- __cfqq = list_entry_cfqq(prv);
-
- WARN_ON(__cfqq->service_used > cfqq->service_used);
- if (cfqq->service_used != __cfqq->service_used)
- break;
- if (cfqq->in_flight > __cfqq->in_flight)
- break;
-
- entry = prv;
- }
- }
}
list_add(&cfqq->cfq_list, entry);
@@ -469,28 +455,24 @@ static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
/*
* add to busy list of queues for service, trying to be fair in ordering
- * the pending list according to requests serviced
+ * the pending list according to last request service
*/
static inline void
-cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
{
- /*
- * it's currently on the empty list
- */
+ BUG_ON(cfqq->on_rr);
cfqq->on_rr = 1;
cfqd->busy_queues++;
- if (time_after(jiffies, cfqq->service_start + cfq_service))
- cfqq->service_used >>= 3;
-
- cfq_sort_rr_list(cfqq, 1);
+ cfq_resort_rr_list(cfqq, requeue);
}
static inline void
cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- list_move(&cfqq->cfq_list, &cfqd->empty_list);
+ BUG_ON(!cfqq->on_rr);
cfqq->on_rr = 0;
+ list_move(&cfqq->cfq_list, &cfqd->empty_list);
BUG_ON(!cfqd->busy_queues);
cfqd->busy_queues--;
@@ -505,16 +487,17 @@ static inline void cfq_del_crq_rb(struct cfq_rq *crq)
if (ON_RB(&crq->rb_node)) {
struct cfq_data *cfqd = cfqq->cfqd;
+ const int sync = crq->is_sync;
- BUG_ON(!cfqq->queued[crq->is_sync]);
+ BUG_ON(!cfqq->queued[sync]);
+ cfqq->queued[sync]--;
cfq_update_next_crq(crq);
- cfqq->queued[crq->is_sync]--;
rb_erase(&crq->rb_node, &cfqq->sort_list);
RB_CLEAR_COLOR(&crq->rb_node);
- if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr)
+ if (cfqq->on_rr && RB_EMPTY(&cfqq->sort_list))
cfq_del_cfqq_rr(cfqd, cfqq);
}
}
@@ -562,7 +545,7 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
rb_insert_color(&crq->rb_node, &cfqq->sort_list);
if (!cfqq->on_rr)
- cfq_add_cfqq_rr(cfqd, cfqq);
+ cfq_add_cfqq_rr(cfqd, cfqq, crq->requeued);
/*
* check if this request is a better next-serve candidate
@@ -581,11 +564,10 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
cfq_add_crq_rb(crq);
}
-static struct request *
-cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
+static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
+
{
- const unsigned long key = cfq_hash_key(cfqd, current);
- struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, key);
+ struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid);
struct rb_node *n;
if (!cfqq)
@@ -609,20 +591,23 @@ out:
static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
{
+ struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_rq *crq = RQ_DATA(rq);
if (crq) {
struct cfq_queue *cfqq = crq->cfq_queue;
- if (cfqq->cfqd->cfq_tagged) {
- cfqq->service_used--;
- cfq_sort_rr_list(cfqq, 0);
- }
-
if (crq->accounted) {
crq->accounted = 0;
- cfqq->cfqd->rq_in_driver--;
+ WARN_ON(!cfqd->rq_in_driver);
+ cfqd->rq_in_driver--;
+ }
+ if (crq->in_flight) {
+ crq->in_flight = 0;
+ WARN_ON(!cfqq->in_flight);
+ cfqq->in_flight--;
}
+ crq->requeued = 1;
}
}
@@ -640,11 +625,10 @@ static void cfq_remove_request(request_queue_t *q, struct request *rq)
struct cfq_rq *crq = RQ_DATA(rq);
if (crq) {
- cfq_remove_merge_hints(q, crq);
list_del_init(&rq->queuelist);
+ cfq_del_crq_rb(crq);
+ cfq_remove_merge_hints(q, crq);
- if (crq->cfq_queue)
- cfq_del_crq_rb(crq);
}
}
@@ -662,21 +646,15 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
}
__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
- if (__rq) {
- BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_BACK_MERGE;
- goto out;
- }
+ if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ ret = ELEVATOR_BACK_MERGE;
+ goto out;
}
__rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
- if (__rq) {
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_FRONT_MERGE;
- goto out;
- }
+ if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ ret = ELEVATOR_FRONT_MERGE;
+ goto out;
}
return ELEVATOR_NO_MERGE;
@@ -709,20 +687,194 @@ static void
cfq_merged_requests(request_queue_t *q, struct request *rq,
struct request *next)
{
- struct cfq_rq *crq = RQ_DATA(rq);
- struct cfq_rq *cnext = RQ_DATA(next);
-
cfq_merged_request(q, rq);
- if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) {
- if (time_before(cnext->queue_start, crq->queue_start)) {
- list_move(&rq->queuelist, &next->queuelist);
- crq->queue_start = cnext->queue_start;
+ /*
+ * reposition in fifo if next is older than rq
+ */
+ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
+ time_before(next->start_time, rq->start_time))
+ list_move(&rq->queuelist, &next->queuelist);
+
+ cfq_remove_request(q, next);
+}
+
+static inline void
+__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+ if (cfqq) {
+ /*
+ * stop potential idle class queues waiting service
+ */
+ del_timer(&cfqd->idle_class_timer);
+
+ cfqq->slice_start = jiffies;
+ cfqq->slice_end = 0;
+ cfqq->slice_left = 0;
+ cfqq->must_alloc_slice = 0;
+ cfqq->fifo_expire = 0;
+ }
+
+ cfqd->active_queue = cfqq;
+}
+
+/*
+ * 0
+ * 0,1
+ * 0,1,2
+ * 0,1,2,3
+ * 0,1,2,3,4
+ * 0,1,2,3,4,5
+ * 0,1,2,3,4,5,6
+ * 0,1,2,3,4,5,6,7
+ */
+static int cfq_get_next_prio_level(struct cfq_data *cfqd)
+{
+ int prio, wrap;
+
+ prio = -1;
+ wrap = 0;
+ do {
+ int p;
+
+ for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
+ if (!list_empty(&cfqd->rr_list[p])) {
+ prio = p;
+ break;
+ }
}
+
+ if (prio != -1)
+ break;
+ cfqd->cur_prio = 0;
+ if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
+ cfqd->cur_end_prio = 0;
+ if (wrap)
+ break;
+ wrap = 1;
+ }
+ } while (1);
+
+ if (unlikely(prio == -1))
+ return -1;
+
+ BUG_ON(prio >= CFQ_PRIO_LISTS);
+
+ list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
+
+ cfqd->cur_prio = prio + 1;
+ if (cfqd->cur_prio > cfqd->cur_end_prio) {
+ cfqd->cur_end_prio = cfqd->cur_prio;
+ cfqd->cur_prio = 0;
+ }
+ if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
+ cfqd->cur_prio = 0;
+ cfqd->cur_end_prio = 0;
}
- cfq_update_next_crq(cnext);
- cfq_remove_request(q, next);
+ return prio;
+}
+
+static void cfq_set_active_queue(struct cfq_data *cfqd)
+{
+ struct cfq_queue *cfqq = NULL;
+
+ /*
+ * if current list is non-empty, grab first entry. if it is empty,
+ * get next prio level and grab first entry then if any are spliced
+ */
+ if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
+ cfqq = list_entry_cfqq(cfqd->cur_rr.next);
+
+ /*
+ * if we have idle queues and no rt or be queues had pending
+ * requests, either allow immediate service if the grace period
+ * has passed or arm the idle grace timer
+ */
+ if (!cfqq && !list_empty(&cfqd->idle_rr)) {
+ unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
+
+ if (time_after_eq(jiffies, end))
+ cfqq = list_entry_cfqq(cfqd->idle_rr.next);
+ else
+ mod_timer(&cfqd->idle_class_timer, end);
+ }
+
+ __cfq_set_active_queue(cfqd, cfqq);
+}
+
+/*
+ * current cfqq expired its slice (or was too idle), select new one
+ */
+static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
+{
+ struct cfq_queue *cfqq = cfqd->active_queue;
+
+ if (cfqq) {
+ unsigned long now = jiffies;
+
+ if (cfqq->wait_request)
+ del_timer(&cfqd->idle_slice_timer);
+
+ if (!preempted && !cfqq->in_flight)
+ cfqq->service_last = now;
+
+ cfqq->must_dispatch = 0;
+ cfqq->wait_request = 0;
+
+ /*
+ * store what was left of this slice, if the queue idled out
+ * or was preempted
+ */
+ if (time_after(now, cfqq->slice_end))
+ cfqq->slice_left = now - cfqq->slice_end;
+ else
+ cfqq->slice_left = 0;
+
+ if (cfqq->on_rr)
+ cfq_resort_rr_list(cfqq, preempted);
+
+ cfqd->active_queue = NULL;
+
+ if (cfqd->active_cic) {
+ put_io_context(cfqd->active_cic->ioc);
+ cfqd->active_cic = NULL;
+ }
+ }
+
+ cfqd->dispatch_slice = 0;
+}
+
+static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+
+{
+ WARN_ON(!RB_EMPTY(&cfqq->sort_list));
+ WARN_ON(cfqq != cfqd->active_queue);
+
+ /*
+ * idle is disabled, either manually or by past process history
+ */
+ if (!cfqd->cfq_slice_idle)
+ return 0;
+ if (!cfqq->idle_window)
+ return 0;
+ /*
+ * task has exited, don't wait
+ */
+ if (cfqd->active_cic && !cfqd->active_cic->ioc->task)
+ return 0;
+
+ cfqq->wait_request = 1;
+ cfqq->must_alloc = 1;
+
+ if (!timer_pending(&cfqd->idle_slice_timer)) {
+ unsigned long slice_left = cfqq->slice_end - 1;
+
+ cfqd->idle_slice_timer.expires = min(jiffies + cfqd->cfq_slice_idle, slice_left);
+ add_timer(&cfqd->idle_slice_timer);
+ }
+
+ return 1;
}
/*
@@ -738,31 +890,39 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
struct request *__rq;
sector_t last;
- cfq_del_crq_rb(crq);
- cfq_remove_merge_hints(q, crq);
list_del(&crq->request->queuelist);
last = cfqd->last_sector;
- while ((entry = entry->prev) != head) {
- __rq = list_entry_rq(entry);
+ list_for_each_entry_reverse(__rq, head, queuelist) {
+ struct cfq_rq *__crq = RQ_DATA(__rq);
- if (blk_barrier_rq(crq->request))
+ if (blk_barrier_rq(__rq))
break;
- if (!blk_fs_request(crq->request))
+ if (!blk_fs_request(__rq))
+ break;
+ if (__crq->requeued)
break;
- if (crq->request->sector > __rq->sector)
+ if (__rq->sector <= crq->request->sector)
break;
if (__rq->sector > last && crq->request->sector < last) {
- last = crq->request->sector;
+ last = crq->request->sector + crq->request->nr_sectors;
break;
}
+ entry = &__rq->queuelist;
}
cfqd->last_sector = last;
+
+ cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
+
+ cfq_del_crq_rb(crq);
+ cfq_remove_merge_hints(q, crq);
+
crq->in_flight = 1;
+ crq->requeued = 0;
cfqq->in_flight++;
- list_add(&crq->request->queuelist, entry);
+ list_add_tail(&crq->request->queuelist, entry);
}
/*
@@ -771,105 +931,176 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
{
struct cfq_data *cfqd = cfqq->cfqd;
- const int reads = !list_empty(&cfqq->fifo[0]);
- const int writes = !list_empty(&cfqq->fifo[1]);
- unsigned long now = jiffies;
+ struct request *rq;
struct cfq_rq *crq;
- if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
+ if (cfqq->fifo_expire)
return NULL;
- crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist));
- if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) {
- cfqq->last_fifo_expire = now;
- return crq;
- }
+ if (!list_empty(&cfqq->fifo)) {
+ int fifo = cfq_cfqq_sync(cfqq);
- crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist));
- if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) {
- cfqq->last_fifo_expire = now;
- return crq;
+ crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next));
+ rq = crq->request;
+ if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) {
+ cfqq->fifo_expire = 1;
+ return crq;
+ }
}
return NULL;
}
/*
- * dispatch a single request from given queue
+ * Scale schedule slice based on io priority
*/
+static inline int
+cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+ const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)];
+
+ WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
+
+ return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio));
+}
+
static inline void
-cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd,
- struct cfq_queue *cfqq)
+cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- struct cfq_rq *crq;
+ cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+}
+
+static inline int
+cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+ const int base_rq = cfqd->cfq_slice_async_rq;
+
+ WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR);
+
+ return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio));
+}
+
+/*
+ * get next queue for service
+ */
+static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force)
+{
+ unsigned long now = jiffies;
+ struct cfq_queue *cfqq;
+
+ cfqq = cfqd->active_queue;
+ if (!cfqq)
+ goto new_queue;
/*
- * follow expired path, else get first next available
+ * slice has expired
*/
- if ((crq = cfq_check_fifo(cfqq)) == NULL) {
- if (cfqd->find_best_crq)
- crq = cfqq->next_crq;
- else
- crq = rb_entry_crq(rb_first(&cfqq->sort_list));
- }
-
- cfqd->last_sector = crq->request->sector + crq->request->nr_sectors;
+ if (!cfqq->must_dispatch && time_after(jiffies, cfqq->slice_end))
+ goto new_queue;
/*
- * finally, insert request into driver list
+ * if queue has requests, dispatch one. if not, check if
+ * enough slice is left to wait for one
*/
- cfq_dispatch_sort(q, crq);
+ if (!RB_EMPTY(&cfqq->sort_list))
+ goto keep_queue;
+ else if (!force && cfq_cfqq_sync(cfqq) &&
+ time_before(now, cfqq->slice_end)) {
+ if (cfq_arm_slice_timer(cfqd, cfqq))
+ return NULL;
+ }
+
+new_queue:
+ cfq_slice_expired(cfqd, 0);
+ cfq_set_active_queue(cfqd);
+keep_queue:
+ return cfqd->active_queue;
}
-static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch)
+static int
+__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ int max_dispatch)
{
- struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_queue *cfqq;
- struct list_head *entry, *tmp;
- int queued, busy_queues, first_round;
+ int dispatched = 0;
- if (list_empty(&cfqd->rr_list))
- return 0;
+ BUG_ON(RB_EMPTY(&cfqq->sort_list));
- queued = 0;
- first_round = 1;
-restart:
- busy_queues = 0;
- list_for_each_safe(entry, tmp, &cfqd->rr_list) {
- cfqq = list_entry_cfqq(entry);
+ do {
+ struct cfq_rq *crq;
- BUG_ON(RB_EMPTY(&cfqq->sort_list));
+ /*
+ * follow expired path, else get first next available
+ */
+ if ((crq = cfq_check_fifo(cfqq)) == NULL)
+ crq = cfqq->next_crq;
/*
- * first round of queueing, only select from queues that
- * don't already have io in-flight
+ * finally, insert request into driver dispatch list
*/
- if (first_round && cfqq->in_flight)
- continue;
+ cfq_dispatch_sort(cfqd->queue, crq);
- cfq_dispatch_request(q, cfqd, cfqq);
+ cfqd->dispatch_slice++;
+ dispatched++;
- if (!RB_EMPTY(&cfqq->sort_list))
- busy_queues++;
+ if (!cfqd->active_cic) {
+ atomic_inc(&crq->io_context->ioc->refcount);
+ cfqd->active_cic = crq->io_context;
+ }
- queued++;
- }
+ if (RB_EMPTY(&cfqq->sort_list))
+ break;
+
+ } while (dispatched < max_dispatch);
+
+ /*
+ * if slice end isn't set yet, set it. if at least one request was
+ * sync, use the sync time slice value
+ */
+ if (!cfqq->slice_end)
+ cfq_set_prio_slice(cfqd, cfqq);
+
+ /*
+ * expire an async queue immediately if it has used up its slice. idle
+ * queue always expire after 1 dispatch round.
+ */
+ if ((!cfq_cfqq_sync(cfqq) &&
+ cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
+ cfq_class_idle(cfqq))
+ cfq_slice_expired(cfqd, 0);
+
+ return dispatched;
+}
+
+static int
+cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
+{
+ struct cfq_data *cfqd = q->elevator->elevator_data;
+ struct cfq_queue *cfqq;
- if ((queued < max_dispatch) && (busy_queues || first_round)) {
- first_round = 0;
- goto restart;
+ if (!cfqd->busy_queues)
+ return 0;
+
+ cfqq = cfq_select_queue(cfqd, force);
+ if (cfqq) {
+ cfqq->wait_request = 0;
+ cfqq->must_dispatch = 0;
+ del_timer(&cfqd->idle_slice_timer);
+
+ if (cfq_class_idle(cfqq))
+ max_dispatch = 1;
+
+ return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
}
- return queued;
+ return 0;
}
static inline void cfq_account_dispatch(struct cfq_rq *crq)
{
struct cfq_queue *cfqq = crq->cfq_queue;
struct cfq_data *cfqd = cfqq->cfqd;
- unsigned long now, elapsed;
- if (!blk_fs_request(crq->request))
+ if (unlikely(!blk_fs_request(crq->request)))
return;
/*
@@ -879,65 +1110,34 @@ static inline void cfq_account_dispatch(struct cfq_rq *crq)
if (crq->accounted)
return;
- now = jiffies;
- if (cfqq->service_start == ~0UL)
- cfqq->service_start = now;
-
- /*
- * on drives with tagged command queueing, command turn-around time
- * doesn't nece