summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@suse.de>2005-06-27 10:55:12 +0200
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-27 14:33:29 -0700
commit22e2c507c301c3dbbcf91b4948b88f78842ee6c9 (patch)
tree9a97c91d1362e69703aa286021daffb8a5456f4c
parent020f46a39eb7b99a575b9f4d105fce2b142acdf1 (diff)
[PATCH] Update cfq io scheduler to time sliced design
This updates the CFQ io scheduler to the new time sliced design (cfq v3). It provides full process fairness, while giving excellent aggregate system throughput even for many competing processes. It supports io priorities, either inherited from the cpu nice value or set directly with the ioprio_get/set syscalls. The latter closely mimic set/getpriority. This import is based on my latest from -mm. Signed-off-by: Jens Axboe <axboe@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/kernel/syscall_table.S2
-rw-r--r--arch/ia64/kernel/entry.S4
-rw-r--r--arch/ppc/kernel/misc.S2
-rw-r--r--drivers/block/as-iosched.c5
-rw-r--r--drivers/block/cfq-iosched.c1906
-rw-r--r--drivers/block/deadline-iosched.c3
-rw-r--r--drivers/block/elevator.c9
-rw-r--r--drivers/block/ll_rw_blk.c59
-rw-r--r--fs/Makefile1
-rw-r--r--fs/ioprio.c172
-rw-r--r--fs/reiserfs/journal.c12
-rw-r--r--include/asm-i386/unistd.h4
-rw-r--r--include/asm-ia64/unistd.h2
-rw-r--r--include/asm-ppc/unistd.h4
-rw-r--r--include/asm-x86_64/unistd.h6
-rw-r--r--include/linux/bio.h14
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--include/linux/elevator.h8
-rw-r--r--include/linux/fs.h19
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/ioprio.h87
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/writeback.h6
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/sched.c8
26 files changed, 1683 insertions, 690 deletions
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 442a6e937b19..3db9a04aec6e 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -289,3 +289,5 @@ ENTRY(sys_call_table)
.long sys_add_key
.long sys_request_key
.long sys_keyctl
+ .long sys_ioprio_set
+ .long sys_ioprio_get /* 290 */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index b1d5d3d5276c..785a51b0ad8e 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1577,8 +1577,8 @@ sys_call_table:
data8 sys_add_key
data8 sys_request_key
data8 sys_keyctl
- data8 sys_ni_syscall
- data8 sys_ni_syscall // 1275
+ data8 sys_ioprio_set
+ data8 sys_ioprio_get // 1275
data8 sys_set_zone_reclaim
data8 sys_ni_syscall
data8 sys_ni_syscall
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index b6a63a49a232..191a8def3bdb 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -1449,3 +1449,5 @@ _GLOBAL(sys_call_table)
.long sys_request_key /* 270 */
.long sys_keyctl
.long sys_waitid
+ .long sys_ioprio_set
+ .long sys_ioprio_get
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 3410b4d294b9..91aeb678135d 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -1806,7 +1806,8 @@ static void as_put_request(request_queue_t *q, struct request *rq)
rq->elevator_private = NULL;
}
-static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
+static int as_set_request(request_queue_t *q, struct request *rq,
+ struct bio *bio, int gfp_mask)
{
struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1827,7 +1828,7 @@ static int as_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
return 1;
}
-static int as_may_queue(request_queue_t *q, int rw)
+static int as_may_queue(request_queue_t *q, int rw, struct bio *bio)
{
int ret = ELV_MQUEUE_MAY;
struct as_data *ad = q->elevator->elevator_data;
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index 3ac47dde64da..35f6e569d5e5 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -21,22 +21,33 @@
#include <linux/hash.h>
#include <linux/rbtree.h>
#include <linux/mempool.h>
-
-static unsigned long max_elapsed_crq;
-static unsigned long max_elapsed_dispatch;
+#include <linux/ioprio.h>
+#include <linux/writeback.h>
/*
* tunables
*/
static int cfq_quantum = 4; /* max queue in one round of service */
static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/
-static int cfq_service = HZ; /* period over which service is avg */
-static int cfq_fifo_expire_r = HZ / 2; /* fifo timeout for sync requests */
-static int cfq_fifo_expire_w = 5 * HZ; /* fifo timeout for async requests */
-static int cfq_fifo_rate = HZ / 8; /* fifo expiry rate */
+static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */
static int cfq_back_penalty = 2; /* penalty of a backwards seek */
+static int cfq_slice_sync = HZ / 10;
+static int cfq_slice_async = HZ / 50;
+static int cfq_slice_async_rq = 2;
+static int cfq_slice_idle = HZ / 50;
+
+#define CFQ_IDLE_GRACE (HZ / 10)
+#define CFQ_SLICE_SCALE (5)
+
+#define CFQ_KEY_ASYNC (0)
+
+/*
+ * disable queueing at the driver/hardware level
+ */
+static int cfq_max_depth = 1;
+
/*
* for the hash of cfqq inside the cfqd
*/
@@ -55,6 +66,7 @@ static int cfq_back_penalty = 2; /* penalty of a backwards seek */
#define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
#define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
+#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
#define RQ_DATA(rq) (rq)->elevator_private
@@ -75,78 +87,101 @@ static int cfq_back_penalty = 2; /* penalty of a backwards seek */
#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
#define rq_rb_key(rq) (rq)->sector
-/*
- * threshold for switching off non-tag accounting
- */
-#define CFQ_MAX_TAG (4)
-
-/*
- * sort key types and names
- */
-enum {
- CFQ_KEY_PGID,
- CFQ_KEY_TGID,
- CFQ_KEY_UID,
- CFQ_KEY_GID,
- CFQ_KEY_LAST,
-};
-
-static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL };
-
static kmem_cache_t *crq_pool;
static kmem_cache_t *cfq_pool;
static kmem_cache_t *cfq_ioc_pool;
+#define CFQ_PRIO_LISTS IOPRIO_BE_NR
+#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE)
+#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
+
+#define cfq_cfqq_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC)
+
+/*
+ * Per block device queue structure
+ */
struct cfq_data {
- struct list_head rr_list;
+ atomic_t ref;
+ request_queue_t *queue;
+
+ /*
+ * rr list of queues with requests and the count of them
+ */
+ struct list_head rr_list[CFQ_PRIO_LISTS];
+ struct list_head busy_rr;
+ struct list_head cur_rr;
+ struct list_head idle_rr;
+ unsigned int busy_queues;
+
+ /*
+ * non-ordered list of empty cfqq's
+ */
struct list_head empty_list;
+ /*
+ * cfqq lookup hash
+ */
struct hlist_head *cfq_hash;
- struct hlist_head *crq_hash;
- /* queues on rr_list (ie they have pending requests */
- unsigned int busy_queues;
+ /*
+ * global crq hash for all queues
+ */
+ struct hlist_head *crq_hash;
unsigned int max_queued;
- atomic_t ref;
+ mempool_t *crq_pool;
- int key_type;
+ int rq_in_driver;
- mempool_t *crq_pool;
+ /*
+ * schedule slice state info
+ */
+ /*
+ * idle window management
+ */
+ struct timer_list idle_slice_timer;
+ struct work_struct unplug_work;
- request_queue_t *queue;
+ struct cfq_queue *active_queue;
+ struct cfq_io_context *active_cic;
+ int cur_prio, cur_end_prio;
+ unsigned int dispatch_slice;
+
+ struct timer_list idle_class_timer;
sector_t last_sector;
+ unsigned long last_end_request;
- int rq_in_driver;
+ unsigned int rq_starved;
/*
* tunables, see top of file
*/
unsigned int cfq_quantum;
unsigned int cfq_queued;
- unsigned int cfq_fifo_expire_r;
- unsigned int cfq_fifo_expire_w;
- unsigned int cfq_fifo_batch_expire;
+ unsigned int cfq_fifo_expire[2];
unsigned int cfq_back_penalty;
unsigned int cfq_back_max;
- unsigned int find_best_crq;
-
- unsigned int cfq_tagged;
+ unsigned int cfq_slice[2];
+ unsigned int cfq_slice_async_rq;
+ unsigned int cfq_slice_idle;
+ unsigned int cfq_max_depth;
};
+/*
+ * Per process-grouping structure
+ */
struct cfq_queue {
/* reference count */
atomic_t ref;
/* parent cfq_data */
struct cfq_data *cfqd;
- /* hash of mergeable requests */
+ /* cfqq lookup hash */
struct hlist_node cfq_hash;
/* hash key */
- unsigned long key;
- /* whether queue is on rr (or empty) list */
- int on_rr;
+ unsigned int key;
/* on either rr or empty list of cfqd */
struct list_head cfq_list;
/* sorted list of pending requests */
@@ -158,21 +193,35 @@ struct cfq_queue {
/* currently allocated requests */
int allocated[2];
/* fifo list of requests in sort_list */
- struct list_head fifo[2];
- /* last time fifo expired */
- unsigned long last_fifo_expire;
-
- int key_type;
-
- unsigned long service_start;
- unsigned long service_used;
+ struct list_head fifo;
- unsigned int max_rate;
+ unsigned long slice_start;
+ unsigned long slice_end;
+ unsigned long slice_left;
+ unsigned long service_last;
/* number of requests that have been handed to the driver */
int in_flight;
- /* number of currently allocated requests */
- int alloc_limit[2];
+
+ /* io prio of this group */
+ unsigned short ioprio, org_ioprio;
+ unsigned short ioprio_class, org_ioprio_class;
+
+ /* whether queue is on rr (or empty) list */
+ unsigned on_rr : 1;
+ /* idle slice, waiting for new request submission */
+ unsigned wait_request : 1;
+ /* set when wait_request gets set, reset on first rq alloc */
+ unsigned must_alloc : 1;
+ /* only gets one must_alloc per slice */
+ unsigned must_alloc_slice : 1;
+ /* idle slice, request added, now waiting to dispatch it */
+ unsigned must_dispatch : 1;
+ /* fifo expire per-slice */
+ unsigned fifo_expire : 1;
+
+ unsigned idle_window : 1;
+ unsigned prio_changed : 1;
};
struct cfq_rq {
@@ -184,42 +233,17 @@ struct cfq_rq {
struct cfq_queue *cfq_queue;
struct cfq_io_context *io_context;
- unsigned long service_start;
- unsigned long queue_start;
-
- unsigned int in_flight : 1;
- unsigned int accounted : 1;
- unsigned int is_sync : 1;
- unsigned int is_write : 1;
+ unsigned in_flight : 1;
+ unsigned accounted : 1;
+ unsigned is_sync : 1;
+ unsigned requeued : 1;
};
-static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long);
+static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int);
static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
-static void cfq_update_next_crq(struct cfq_rq *);
static void cfq_put_cfqd(struct cfq_data *cfqd);
-/*
- * what the fairness is based on (ie how processes are grouped and
- * differentiated)
- */
-static inline unsigned long
-cfq_hash_key(struct cfq_data *cfqd, struct task_struct *tsk)
-{
- /*
- * optimize this so that ->key_type is the offset into the struct
- */
- switch (cfqd->key_type) {
- case CFQ_KEY_PGID:
- return process_group(tsk);
- default:
- case CFQ_KEY_TGID:
- return tsk->tgid;
- case CFQ_KEY_UID:
- return tsk->uid;
- case CFQ_KEY_GID:
- return tsk->gid;
- }
-}
+#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)
/*
* lots of deadline iosched dupes, can be abstracted later...
@@ -235,16 +259,12 @@ static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
if (q->last_merge == crq->request)
q->last_merge = NULL;
-
- cfq_update_next_crq(crq);
}
static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
{
const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
- BUG_ON(!hlist_unhashed(&crq->hash));
-
hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
}
@@ -257,8 +277,6 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
struct cfq_rq *crq = list_entry_hash(entry);
struct request *__rq = crq->request;
- BUG_ON(hlist_unhashed(&crq->hash));
-
if (!rq_mergeable(__rq)) {
cfq_del_crq_hash(crq);
continue;
@@ -287,36 +305,16 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
return crq2;
if (crq2 == NULL)
return crq1;
+ if (crq1->requeued)
+ return crq1;
+ if (crq2->requeued)
+ return crq2;
s1 = crq1->request->sector;
s2 = crq2->request->sector;
last = cfqd->last_sector;
-#if 0
- if (!list_empty(&cfqd->queue->queue_head)) {
- struct list_head *entry = &cfqd->queue->queue_head;
- unsigned long distance = ~0UL;
- struct request *rq;
-
- while ((entry = entry->prev) != &cfqd->queue->queue_head) {
- rq = list_entry_rq(entry);
-
- if (blk_barrier_rq(rq))
- break;
-
- if (distance < abs(s1 - rq->sector + rq->nr_sectors)) {
- distance = abs(s1 - rq->sector +rq->nr_sectors);
- last = rq->sector + rq->nr_sectors;
- }
- if (distance < abs(s2 - rq->sector + rq->nr_sectors)) {
- distance = abs(s2 - rq->sector +rq->nr_sectors);
- last = rq->sector + rq->nr_sectors;
- }
- }
- }
-#endif
-
/*
* by definition, 1KiB is 2 sectors
*/
@@ -377,11 +375,13 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
struct rb_node *rbnext, *rbprev;
- if (!ON_RB(&last->rb_node))
- return NULL;
-
- if ((rbnext = rb_next(&last->rb_node)) == NULL)
+ if (ON_RB(&last->rb_node))
+ rbnext = rb_next(&last->rb_node);
+ else {
rbnext = rb_first(&cfqq->sort_list);
+ if (rbnext == &last->rb_node)
+ rbnext = NULL;
+ }
rbprev = rb_prev(&last->rb_node);
@@ -401,67 +401,53 @@ static void cfq_update_next_crq(struct cfq_rq *crq)
cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq);
}
-static int cfq_check_sort_rr_list(struct cfq_queue *cfqq)
+static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
{
- struct list_head *head = &cfqq->cfqd->rr_list;
- struct list_head *next, *prev;
-
- /*
- * list might still be ordered
- */
- next = cfqq->cfq_list.next;
- if (next != head) {
- struct cfq_queue *cnext = list_entry_cfqq(next);
+ struct cfq_data *cfqd = cfqq->cfqd;
+ struct list_head *list, *entry;
- if (cfqq->service_used > cnext->service_used)
- return 1;
- }
+ BUG_ON(!cfqq->on_rr);
- prev = cfqq->cfq_list.prev;
- if (prev != head) {
- struct cfq_queue *cprev = list_entry_cfqq(prev);
+ list_del(&cfqq->cfq_list);
- if (cfqq->service_used < cprev->service_used)
- return 1;
+ if (cfq_class_rt(cfqq))
+ list = &cfqd->cur_rr;
+ else if (cfq_class_idle(cfqq))
+ list = &cfqd->idle_rr;
+ else {
+ /*
+ * if cfqq has requests in flight, don't allow it to be
+ * found in cfq_set_active_queue before it has finished them.
+ * this is done to increase fairness between a process that
+ * has lots of io pending vs one that only generates one
+ * sporadically or synchronously
+ */
+ if (cfqq->in_flight)
+ list = &cfqd->busy_rr;
+ else
+ list = &cfqd->rr_list[cfqq->ioprio];
}
- return 0;
-}
-
-static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
-{
- struct list_head *entry = &cfqq->cfqd->rr_list;
-
- if (!cfqq->on_rr)
- return;
- if (!new_queue && !cfq_check_sort_rr_list(cfqq))
+ /*
+ * if queue was preempted, just add to front to be fair. busy_rr
+ * isn't sorted.
+ */
+ if (preempted || list == &cfqd->busy_rr) {
+ list_add(&cfqq->cfq_list, list);
return;
-
- list_del(&cfqq->cfq_list);
+ }
/*
- * sort by our mean service_used, sub-sort by in-flight requests
+ * sort by when queue was last serviced
*/
- while ((entry = entry->prev) != &cfqq->cfqd->rr_list) {
+ entry = list;
+ while ((entry = entry->prev) != list) {
struct cfq_queue *__cfqq = list_entry_cfqq(entry);
- if (cfqq->service_used > __cfqq->service_used)
+ if (!__cfqq->service_last)
+ break;
+ if (time_before(__cfqq->service_last, cfqq->service_last))
break;
- else if (cfqq->service_used == __cfqq->service_used) {
- struct list_head *prv;
-
- while ((prv = entry->prev) != &cfqq->cfqd->rr_list) {
- __cfqq = list_entry_cfqq(prv);
-
- WARN_ON(__cfqq->service_used > cfqq->service_used);
- if (cfqq->service_used != __cfqq->service_used)
- break;
- if (cfqq->in_flight > __cfqq->in_flight)
- break;
-
- entry = prv;
- }
- }
}
list_add(&cfqq->cfq_list, entry);
@@ -469,28 +455,24 @@ static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue)
/*
* add to busy list of queues for service, trying to be fair in ordering
- * the pending list according to requests serviced
+ * the pending list according to last request service
*/
static inline void
-cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
{
- /*
- * it's currently on the empty list
- */
+ BUG_ON(cfqq->on_rr);
cfqq->on_rr = 1;
cfqd->busy_queues++;
- if (time_after(jiffies, cfqq->service_start + cfq_service))
- cfqq->service_used >>= 3;
-
- cfq_sort_rr_list(cfqq, 1);
+ cfq_resort_rr_list(cfqq, requeue);
}
static inline void
cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- list_move(&cfqq->cfq_list, &cfqd->empty_list);
+ BUG_ON(!cfqq->on_rr);
cfqq->on_rr = 0;
+ list_move(&cfqq->cfq_list, &cfqd->empty_list);
BUG_ON(!cfqd->busy_queues);
cfqd->busy_queues--;
@@ -505,16 +487,17 @@ static inline void cfq_del_crq_rb(struct cfq_rq *crq)
if (ON_RB(&crq->rb_node)) {
struct cfq_data *cfqd = cfqq->cfqd;
+ const int sync = crq->is_sync;
- BUG_ON(!cfqq->queued[crq->is_sync]);
+ BUG_ON(!cfqq->queued[sync]);
+ cfqq->queued[sync]--;
cfq_update_next_crq(crq);
- cfqq->queued[crq->is_sync]--;
rb_erase(&crq->rb_node, &cfqq->sort_list);
RB_CLEAR_COLOR(&crq->rb_node);
- if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr)
+ if (cfqq->on_rr && RB_EMPTY(&cfqq->sort_list))
cfq_del_cfqq_rr(cfqd, cfqq);
}
}
@@ -562,7 +545,7 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
rb_insert_color(&crq->rb_node, &cfqq->sort_list);
if (!cfqq->on_rr)
- cfq_add_cfqq_rr(cfqd, cfqq);
+ cfq_add_cfqq_rr(cfqd, cfqq, crq->requeued);
/*
* check if this request is a better next-serve candidate
@@ -581,11 +564,10 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
cfq_add_crq_rb(crq);
}
-static struct request *
-cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
+static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
+
{
- const unsigned long key = cfq_hash_key(cfqd, current);
- struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, key);
+ struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid);
struct rb_node *n;
if (!cfqq)
@@ -609,20 +591,23 @@ out:
static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
{
+ struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_rq *crq = RQ_DATA(rq);
if (crq) {
struct cfq_queue *cfqq = crq->cfq_queue;
- if (cfqq->cfqd->cfq_tagged) {
- cfqq->service_used--;
- cfq_sort_rr_list(cfqq, 0);
- }
-
if (crq->accounted) {
crq->accounted = 0;
- cfqq->cfqd->rq_in_driver--;
+ WARN_ON(!cfqd->rq_in_driver);
+ cfqd->rq_in_driver--;
+ }
+ if (crq->in_flight) {
+ crq->in_flight = 0;
+ WARN_ON(!cfqq->in_flight);
+ cfqq->in_flight--;
}
+ crq->requeued = 1;
}
}
@@ -640,11 +625,10 @@ static void cfq_remove_request(request_queue_t *q, struct request *rq)
struct cfq_rq *crq = RQ_DATA(rq);
if (crq) {
- cfq_remove_merge_hints(q, crq);
list_del_init(&rq->queuelist);
+ cfq_del_crq_rb(crq);
+ cfq_remove_merge_hints(q, crq);
- if (crq->cfq_queue)
- cfq_del_crq_rb(crq);
}
}
@@ -662,21 +646,15 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
}
__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
- if (__rq) {
- BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
-
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_BACK_MERGE;
- goto out;
- }
+ if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ ret = ELEVATOR_BACK_MERGE;
+ goto out;
}
__rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
- if (__rq) {
- if (elv_rq_merge_ok(__rq, bio)) {
- ret = ELEVATOR_FRONT_MERGE;
- goto out;
- }
+ if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ ret = ELEVATOR_FRONT_MERGE;
+ goto out;
}
return ELEVATOR_NO_MERGE;
@@ -709,20 +687,194 @@ static void
cfq_merged_requests(request_queue_t *q, struct request *rq,
struct request *next)
{
- struct cfq_rq *crq = RQ_DATA(rq);
- struct cfq_rq *cnext = RQ_DATA(next);
-
cfq_merged_request(q, rq);
- if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) {
- if (time_before(cnext->queue_start, crq->queue_start)) {
- list_move(&rq->queuelist, &next->queuelist);
- crq->queue_start = cnext->queue_start;
+ /*
+ * reposition in fifo if next is older than rq
+ */
+ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
+ time_before(next->start_time, rq->start_time))
+ list_move(&rq->queuelist, &next->queuelist);
+
+ cfq_remove_request(q, next);
+}
+
+static inline void
+__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+ if (cfqq) {
+ /*
+ * stop potential idle class queues waiting service
+ */
+ del_timer(&cfqd->idle_class_timer);
+
+ cfqq->slice_start = jiffies;
+ cfqq->slice_end = 0;
+ cfqq->slice_left = 0;
+ cfqq->must_alloc_slice = 0;
+ cfqq->fifo_expire = 0;
+ }
+
+ cfqd->active_queue = cfqq;
+}
+
+/*
+ * 0
+ * 0,1
+ * 0,1,2
+ * 0,1,2,3
+ * 0,1,2,3,4
+ * 0,1,2,3,4,5
+ * 0,1,2,3,4,5,6
+ * 0,1,2,3,4,5,6,7
+ */
+static int cfq_get_next_prio_level(struct cfq_data *cfqd)
+{
+ int prio, wrap;
+
+ prio = -1;
+ wrap = 0;
+ do {
+ int p;
+
+ for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) {
+ if (!list_empty(&cfqd->rr_list[p])) {
+ prio = p;
+ break;
+ }
}
+
+ if (prio != -1)
+ break;
+ cfqd->cur_prio = 0;
+ if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
+ cfqd->cur_end_prio = 0;
+ if (wrap)
+ break;
+ wrap = 1;
+ }
+ } while (1);
+
+ if (unlikely(prio == -1))
+ return -1;
+
+ BUG_ON(prio >= CFQ_PRIO_LISTS);
+
+ list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr);
+
+ cfqd->cur_prio = prio + 1;
+ if (cfqd->cur_prio > cfqd->cur_end_prio) {
+ cfqd->cur_end_prio = cfqd->cur_prio;
+ cfqd->cur_prio = 0;
+ }
+ if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) {
+ cfqd->cur_prio = 0;
+ cfqd->cur_end_prio = 0;
}
- cfq_update_next_crq(cnext);
- cfq_remove_request(q, next);
+ return prio;
+}
+
+static void cfq_set_active_queue(struct cfq_data *cfqd)
+{
+ struct cfq_queue *cfqq = NULL;
+
+ /*
+ * if current list is non-empty, grab first entry. if it is empty,
+ * get next prio level and grab first entry then if any are spliced
+ */
+ if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1)
+ cfqq = list_entry_cfqq(cfqd->cur_rr.next);
+
+ /*
+ * if we have idle queues and no rt or be queues had pending
+ * requests, either allow immediate service if the grace period
+ * has passed or arm the idle grace timer
+ */
+ if (!cfqq && !list_empty(&cfqd->idle_rr)) {
+ unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE;
+
+ if (time_after_eq(jiffies, end))
+ cfqq = list_entry_cfqq(cfqd->idle_rr.next);
+ else
+ mod_timer(&cfqd->idle_class_timer, end);
+ }
+
+ __cfq_set_active_queue(cfqd, cfqq);
+}
+
+/*
+ * current cfqq expired its slice (or was too idle), select new one
+ */
+static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted)
+{
+ struct cfq_queue *cfqq = cfqd->active_queue;
+
+ if (cfqq) {
+ unsigned long now = jiffies;
+
+ if (cfqq->wait_request)
+ del_timer(&cfqd->idle_slice_timer);
+
+ if (!preempted && !cfqq->in_flight)
+ cfqq->service_last = now;
+
+ cfqq->must_dispatch = 0;
+ cfqq->wait_request = 0;
+
+ /*
+ * store what was left of this slice, if the queue idled out
+ * or was preempted
+ */
+ if (time_after(now, cfqq->slice_end))
+ cfqq->slice_left = now - cfqq->slice_end;
+ else
+ cfqq->slice_left = 0;
+
+ if (cfqq->on_rr)
+ cfq_resort_rr_list(cfqq, preempted);
+
+ cfqd->active_queue = NULL;
+
+ if (cfqd->active_cic) {
+ put_io_context(cfqd->active_cic->ioc);
+ cfqd->active_cic = NULL;
+ }
+ }
+
+ cfqd->dispatch_slice = 0;
+}
+
+static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+
+{
+ WARN_ON(!RB_EMPTY(&cfqq->sort_list));
+ WARN_ON(cfqq != cfqd->active_queue);
+
+ /*
+ * idle is disabled, either manually or by past process history
+ */
+ if (!cfqd->cfq_slice_idle)
+ return 0;
+ if (!cfqq->idle_window)
+ return 0;
+ /*
+ * task has exited, don't wait
+ */
+ if (cfqd->active_cic && !cfqd->active_cic->ioc->task)
+ return 0;
+
+ cfqq->wait_request = 1;
+ cfqq->must_alloc = 1;
+
+ if (!timer_pending(&cfqd->idle_slice_timer)) {
+ unsigned long slice_left = cfqq->slice_end - 1;
+
+ cfqd->idle_slice_timer.expires = min(jiffies + cfqd->cfq_slice_idle, slice_left);
+ add_timer(&cfqd->idle_slice_timer);
+ }
+
+ return 1;
}
/*
@@ -738,31 +890,39 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
struct request *__rq;
sector_t last;
- cfq_del_crq_rb(crq);
- cfq_remove_merge_hints(q, crq);
list_del(&crq->request->queuelist);
last = cfqd->last_sector;
- while ((entry = entry->prev) != head) {
- __rq = list_entry_rq(entry);
+ list_for_each_entry_reverse(__rq, head, queuelist) {
+ struct cfq_rq *__crq = RQ_DATA(__rq);
- if (blk_barrier_rq(crq->request))
+ if (blk_barrier_rq(__rq))
break;
- if (!blk_fs_request(crq->request))
+ if (!blk_fs_request(__rq))
+ break;
+ if (__crq->requeued)
break;
- if (crq->request->sector > __rq->sector)
+ if (__rq->sector <= crq->request->sector)
break;
if (__rq->sector > last && crq->request->sector < last) {
- last = crq->request->sector;
+ last = crq->request->sector + crq->request->nr_sectors;
break;
}
+ entry = &__rq->queuelist;
}
cfqd->last_sector = last;
+
+ cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
+
+ cfq_del_crq_rb(crq);
+ cfq_remove_merge_hints(q, crq);
+
crq->in_flight = 1;
+ crq->requeued = 0;
cfqq->in_flight++;
- list_add(&crq->request->queuelist, entry);
+ list_add_tail(&crq->request->queuelist, entry);
}
/*
@@ -771,105 +931,176 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq)
{
struct cfq_data *cfqd = cfqq->cfqd;
- const int reads = !list_empty(&cfqq->fifo[0]);
- const int writes = !list_empty(&cfqq->fifo[1]);
- unsigned long now = jiffies;
+ struct request *rq;
struct cfq_rq *crq;
- if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire))
+ if (cfqq->fifo_expire)
return NULL;