From 3cca6dc1c81e2407928dc4c6105252146fd3924f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 Mar 2011 11:08:00 -0500 Subject: block: add API for delaying work/request_fn a little bit Currently we use plugging for that, but as plugging is going away, we need an alternative mechanism. Signed-off-by: Jens Axboe --- block/blk-core.c | 29 +++++++++++++++++++++++++++++ include/linux/blkdev.h | 6 ++++++ 2 files changed, 35 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 3cc17e6064d6..e958c7a1e462 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -197,6 +197,32 @@ void blk_dump_rq_flags(struct request *rq, char *msg) } EXPORT_SYMBOL(blk_dump_rq_flags); +static void blk_delay_work(struct work_struct *work) +{ + struct request_queue *q; + + q = container_of(work, struct request_queue, delay_work.work); + spin_lock_irq(q->queue_lock); + q->request_fn(q); + spin_unlock_irq(q->queue_lock); +} + +/** + * blk_delay_queue - restart queueing after defined interval + * @q: The &struct request_queue in question + * @msecs: Delay in msecs + * + * Description: + * Sometimes queueing needs to be postponed for a little while, to allow + * resources to come back. This function will make sure that queueing is + * restarted around the specified time. + */ +void blk_delay_queue(struct request_queue *q, unsigned long msecs) +{ + schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs)); +} +EXPORT_SYMBOL(blk_delay_queue); + /* * "plug" the device if there are no outstanding requests: this will * force the transfer to start only after we have put all the requests @@ -363,6 +389,7 @@ EXPORT_SYMBOL(blk_start_queue); void blk_stop_queue(struct request_queue *q) { blk_remove_plug(q); + cancel_delayed_work(&q->delay_work); queue_flag_set(QUEUE_FLAG_STOPPED, q); } EXPORT_SYMBOL(blk_stop_queue); @@ -387,6 +414,7 @@ void blk_sync_queue(struct request_queue *q) del_timer_sync(&q->timeout); cancel_work_sync(&q->unplug_work); throtl_shutdown_timer_wq(q); + cancel_delayed_work_sync(&q->delay_work); } EXPORT_SYMBOL(blk_sync_queue); @@ -534,6 +562,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) INIT_LIST_HEAD(&q->flush_queue[1]); INIT_LIST_HEAD(&q->flush_data_in_flight); INIT_WORK(&q->unplug_work, blk_unplug_work); + INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); kobject_init(&q->kobj, &blk_queue_ktype); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e3ee74fc5903..f55b2a8b6610 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -300,6 +300,11 @@ struct request_queue unsigned long unplug_delay; /* After this many jiffies */ struct work_struct unplug_work; + /* + * Delayed queue handling + */ + struct delayed_work delay_work; + struct backing_dev_info backing_dev_info; /* @@ -677,6 +682,7 @@ extern int blk_insert_cloned_request(struct request_queue *q, extern void blk_plug_device(struct request_queue *); extern void blk_plug_device_unlocked(struct request_queue *); extern int blk_remove_plug(struct request_queue *); +extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, unsigned int, void __user *); -- cgit v1.2.3 From 0a41e90bb7c931fd53d73ea770f5b251af6c91ce Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 16 Apr 2010 21:11:21 +0200 Subject: ide-cd: convert to blk_delay_queue() for a short pause It was always abuse to reuse the plugging infrastructure for this, convert it to the (new) real API for delaying queueing a bit. Signed-off-by: Jens Axboe Acked-by: David S. Miller --- drivers/ide/ide-cd.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 0c73fe39a236..7ce9caf4521b 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -258,17 +258,10 @@ static int ide_cd_breathe(ide_drive_t *drive, struct request *rq) if (time_after(jiffies, info->write_timeout)) return 0; else { - struct request_queue *q = drive->queue; - unsigned long flags; - /* - * take a breather relying on the unplug timer to kick us again + * take a breather */ - - spin_lock_irqsave(q->queue_lock, flags); - blk_plug_device(q); - spin_unlock_irqrestore(q->queue_lock, flags); - + blk_delay_queue(drive->queue, 1); return 1; } } @@ -1514,8 +1507,6 @@ static int ide_cdrom_setup(ide_drive_t *drive) blk_queue_dma_alignment(q, 31); blk_queue_update_dma_pad(q, 15); - q->unplug_delay = max((1 * HZ) / 1000, 1); - drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED; drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id); -- cgit v1.2.3 From a488e74976bf0a9bccecdd094378394942dacef1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 16 Apr 2010 21:13:15 +0200 Subject: scsi: convert to blk_delay_queue() It was always abuse to reuse the plugging infrastructure for this, convert it to the (new) real API for delaying queueing a bit. A default delay of 3 msec is defined, to match the previous behaviour. Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 44 +++++++++++++++++++------------------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9045c52abd25..5a0ae7a944ce 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -67,6 +67,13 @@ static struct scsi_host_sg_pool scsi_sg_pools[] = { struct kmem_cache *scsi_sdb_cache; +/* + * When to reinvoke queueing after a resource shortage. It's 3 msecs to + * not change behaviour from the previous unplug mechanism, experimentation + * may prove this needs changing. + */ +#define SCSI_QUEUE_DELAY 3 + static void scsi_run_queue(struct request_queue *q); /* @@ -149,14 +156,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy) /* * Requeue this command. It will go before all other commands * that are already in the queue. - * - * NOTE: there is magic here about the way the queue is plugged if - * we have no outstanding commands. - * - * Although we *don't* plug the queue, we call the request - * function. The SCSI request function detects the blocked condition - * and plugs the queue appropriately. - */ + */ spin_lock_irqsave(q->queue_lock, flags); blk_requeue_request(q, cmd->request); spin_unlock_irqrestore(q->queue_lock, flags); @@ -1194,11 +1194,11 @@ int scsi_prep_return(struct request_queue *q, struct request *req, int ret) case BLKPREP_DEFER: /* * If we defer, the blk_peek_request() returns NULL, but the - * queue must be restarted, so we plug here if no returning - * command will automatically do that. + * queue must be restarted, so we schedule a callback to happen + * shortly. */ if (sdev->device_busy == 0) - blk_plug_device(q); + blk_delay_queue(q, SCSI_QUEUE_DELAY); break; default: req->cmd_flags |= REQ_DONTPREP; @@ -1237,7 +1237,7 @@ static inline int scsi_dev_queue_ready(struct request_queue *q, sdev_printk(KERN_INFO, sdev, "unblocking device at zero depth\n")); } else { - blk_plug_device(q); + blk_delay_queue(q, SCSI_QUEUE_DELAY); return 0; } } @@ -1467,7 +1467,7 @@ static void scsi_request_fn(struct request_queue *q) * the host is no longer able to accept any more requests. */ shost = sdev->host; - while (!blk_queue_plugged(q)) { + for (;;) { int rtn; /* * get next queueable request. We do this early to make sure @@ -1546,15 +1546,8 @@ static void scsi_request_fn(struct request_queue *q) */ rtn = scsi_dispatch_cmd(cmd); spin_lock_irq(q->queue_lock); - if(rtn) { - /* we're refusing the command; because of - * the way locks get dropped, we need to - * check here if plugging is required */ - if(sdev->device_busy == 0) - blk_plug_device(q); - - break; - } + if (rtn) + goto out_delay; } goto out; @@ -1573,9 +1566,10 @@ static void scsi_request_fn(struct request_queue *q) spin_lock_irq(q->queue_lock); blk_requeue_request(q, req); sdev->device_busy--; - if(sdev->device_busy == 0) - blk_plug_device(q); - out: +out_delay: + if (sdev->device_busy == 0) + blk_delay_queue(q, SCSI_QUEUE_DELAY); +out: /* must be careful here...if we trigger the ->remove() function * we cannot be holding the q lock */ spin_unlock_irq(q->queue_lock); -- cgit v1.2.3 From 73c101011926c5832e6e141682180c4debe2cf45 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Mar 2011 13:19:51 +0100 Subject: block: initial patch for on-stack per-task plugging This patch adds support for creating a queuing context outside of the queue itself. This enables us to batch up pieces of IO before grabbing the block device queue lock and submitting them to the IO scheduler. The context is created on the stack of the process and assigned in the task structure, so that we can auto-unplug it if we hit a schedule event. The current queue plugging happens implicitly if IO is submitted to an empty device, yet callers have to remember to unplug that IO when they are going to wait for it. This is an ugly API and has caused bugs in the past. Additionally, it requires hacks in the vm (->sync_page() callback) to handle that logic. By switching to an explicit plugging scheme we make the API a lot nicer and can get rid of the ->sync_page() hack in the vm. Signed-off-by: Jens Axboe --- block/blk-core.c | 369 ++++++++++++++++++++++++++++++++++------------ block/blk-flush.c | 3 +- block/elevator.c | 6 +- include/linux/blk_types.h | 2 + include/linux/blkdev.h | 42 ++++++ include/linux/elevator.h | 1 + include/linux/sched.h | 6 + kernel/exit.c | 1 + kernel/fork.c | 3 + kernel/sched.c | 12 ++ 10 files changed, 344 insertions(+), 101 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index e958c7a1e462..6efb55cc5af0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -27,6 +27,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -203,7 +204,7 @@ static void blk_delay_work(struct work_struct *work) q = container_of(work, struct request_queue, delay_work.work); spin_lock_irq(q->queue_lock); - q->request_fn(q); + __blk_run_queue(q); spin_unlock_irq(q->queue_lock); } @@ -686,6 +687,8 @@ int blk_get_queue(struct request_queue *q) static inline void blk_free_request(struct request_queue *q, struct request *rq) { + BUG_ON(rq->cmd_flags & REQ_ON_PLUG); + if (rq->cmd_flags & REQ_ELVPRIV) elv_put_request(q, rq); mempool_free(rq, q->rq.rq_pool); @@ -1051,6 +1054,13 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL(blk_requeue_request); +static void add_acct_request(struct request_queue *q, struct request *rq, + int where) +{ + drive_stat_acct(rq, 1); + __elv_add_request(q, rq, where, 0); +} + /** * blk_insert_request - insert a special request into a request queue * @q: request queue where request should be inserted @@ -1093,8 +1103,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); - drive_stat_acct(rq, 1); - __elv_add_request(q, rq, where, 0); + add_acct_request(q, rq, where); __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -1215,6 +1224,113 @@ void blk_add_request_payload(struct request *rq, struct page *page, } EXPORT_SYMBOL_GPL(blk_add_request_payload); +static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, + struct bio *bio) +{ + const int ff = bio->bi_rw & REQ_FAILFAST_MASK; + + /* + * Debug stuff, kill later + */ + if (!rq_mergeable(req)) { + blk_dump_rq_flags(req, "back"); + return false; + } + + if (!ll_back_merge_fn(q, req, bio)) + return false; + + trace_block_bio_backmerge(q, bio); + + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) + blk_rq_set_mixed_merge(req); + + req->biotail->bi_next = bio; + req->biotail = bio; + req->__data_len += bio->bi_size; + req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); + + drive_stat_acct(req, 0); + return true; +} + +static bool bio_attempt_front_merge(struct request_queue *q, + struct request *req, struct bio *bio) +{ + const int ff = bio->bi_rw & REQ_FAILFAST_MASK; + sector_t sector; + + /* + * Debug stuff, kill later + */ + if (!rq_mergeable(req)) { + blk_dump_rq_flags(req, "front"); + return false; + } + + if (!ll_front_merge_fn(q, req, bio)) + return false; + + trace_block_bio_frontmerge(q, bio); + + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) + blk_rq_set_mixed_merge(req); + + sector = bio->bi_sector; + + bio->bi_next = req->bio; + req->bio = bio; + + /* + * may not be valid. if the low level driver said + * it didn't need a bounce buffer then it better + * not touch req->buffer either... + */ + req->buffer = bio_data(bio); + req->__sector = bio->bi_sector; + req->__data_len += bio->bi_size; + req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); + + drive_stat_acct(req, 0); + return true; +} + +/* + * Attempts to merge with the plugged list in the current process. Returns + * true if merge was succesful, otherwise false. + */ +static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, + struct bio *bio) +{ + struct blk_plug *plug; + struct request *rq; + bool ret = false; + + plug = tsk->plug; + if (!plug) + goto out; + + list_for_each_entry_reverse(rq, &plug->list, queuelist) { + int el_ret; + + if (rq->q != q) + continue; + + el_ret = elv_try_merge(rq, bio); + if (el_ret == ELEVATOR_BACK_MERGE) { + ret = bio_attempt_back_merge(q, rq, bio); + if (ret) + break; + } else if (el_ret == ELEVATOR_FRONT_MERGE) { + ret = bio_attempt_front_merge(q, rq, bio); + if (ret) + break; + } + } +out: + return ret; +} + void init_request_from_bio(struct request *req, struct bio *bio) { req->cpu = bio->bi_comp_cpu; @@ -1230,26 +1346,12 @@ void init_request_from_bio(struct request *req, struct bio *bio) blk_rq_bio_prep(req->q, req, bio); } -/* - * Only disabling plugging for non-rotational devices if it does tagging - * as well, otherwise we do need the proper merging - */ -static inline bool queue_should_plug(struct request_queue *q) -{ - return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); -} - static int __make_request(struct request_queue *q, struct bio *bio) { - struct request *req; - int el_ret; - unsigned int bytes = bio->bi_size; - const unsigned short prio = bio_prio(bio); const bool sync = !!(bio->bi_rw & REQ_SYNC); - const bool unplug = !!(bio->bi_rw & REQ_UNPLUG); - const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK; - int where = ELEVATOR_INSERT_SORT; - int rw_flags; + struct blk_plug *plug; + int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; + struct request *req; /* * low level driver can indicate that it wants pages above a @@ -1258,78 +1360,36 @@ static int __make_request(struct request_queue *q, struct bio *bio) */ blk_queue_bounce(q, &bio); - spin_lock_irq(q->queue_lock); - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { + spin_lock_irq(q->queue_lock); where = ELEVATOR_INSERT_FLUSH; goto get_rq; } - if (elv_queue_empty(q)) - goto get_rq; - - el_ret = elv_merge(q, &req, bio); - switch (el_ret) { - case ELEVATOR_BACK_MERGE: - BUG_ON(!rq_mergeable(req)); - - if (!ll_back_merge_fn(q, req, bio)) - break; - - trace_block_bio_backmerge(q, bio); - - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) - blk_rq_set_mixed_merge(req); - - req->biotail->bi_next = bio; - req->biotail = bio; - req->__data_len += bytes; - req->ioprio = ioprio_best(req->ioprio, prio); - if (!blk_rq_cpu_valid(req)) - req->cpu = bio->bi_comp_cpu; - drive_stat_acct(req, 0); - elv_bio_merged(q, req, bio); - if (!attempt_back_merge(q, req)) - elv_merged_request(q, req, el_ret); + /* + * Check if we can merge with the plugged list before grabbing + * any locks. + */ + if (attempt_plug_merge(current, q, bio)) goto out; - case ELEVATOR_FRONT_MERGE: - BUG_ON(!rq_mergeable(req)); - - if (!ll_front_merge_fn(q, req, bio)) - break; - - trace_block_bio_frontmerge(q, bio); + spin_lock_irq(q->queue_lock); - if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { - blk_rq_set_mixed_merge(req); - req->cmd_flags &= ~REQ_FAILFAST_MASK; - req->cmd_flags |= ff; + el_ret = elv_merge(q, &req, bio); + if (el_ret == ELEVATOR_BACK_MERGE) { + BUG_ON(req->cmd_flags & REQ_ON_PLUG); + if (bio_attempt_back_merge(q, req, bio)) { + if (!attempt_back_merge(q, req)) + elv_merged_request(q, req, el_ret); + goto out_unlock; + } + } else if (el_ret == ELEVATOR_FRONT_MERGE) { + BUG_ON(req->cmd_flags & REQ_ON_PLUG); + if (bio_attempt_front_merge(q, req, bio)) { + if (!attempt_front_merge(q, req)) + elv_merged_request(q, req, el_ret); + goto out_unlock; } - - bio->bi_next = req->bio; - req->bio = bio; - - /* - * may not be valid. if the low level driver said - * it didn't need a bounce buffer then it better - * not touch req->buffer either... - */ - req->buffer = bio_data(bio); - req->__sector = bio->bi_sector; - req->__data_len += bytes; - req->ioprio = ioprio_best(req->ioprio, prio); - if (!blk_rq_cpu_valid(req)) - req->cpu = bio->bi_comp_cpu; - drive_stat_acct(req, 0); - elv_bio_merged(q, req, bio); - if (!attempt_front_merge(q, req)) - elv_merged_request(q, req, el_ret); - goto out; - - /* ELV_NO_MERGE: elevator says don't/can't merge. */ - default: - ; } get_rq: @@ -1356,20 +1416,35 @@ get_rq: */ init_request_from_bio(req, bio); - spin_lock_irq(q->queue_lock); if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || - bio_flagged(bio, BIO_CPU_AFFINE)) - req->cpu = blk_cpu_to_group(smp_processor_id()); - if (queue_should_plug(q) && elv_queue_empty(q)) - blk_plug_device(q); - - /* insert the request into the elevator */ - drive_stat_acct(req, 1); - __elv_add_request(q, req, where, 0); + bio_flagged(bio, BIO_CPU_AFFINE)) { + req->cpu = blk_cpu_to_group(get_cpu()); + put_cpu(); + } + + plug = current->plug; + if (plug && !sync) { + if (!plug->should_sort && !list_empty(&plug->list)) { + struct request *__rq; + + __rq = list_entry_rq(plug->list.prev); + if (__rq->q != q) + plug->should_sort = 1; + } + /* + * Debug flag, kill later + */ + req->cmd_flags |= REQ_ON_PLUG; + list_add_tail(&req->queuelist, &plug->list); + drive_stat_acct(req, 1); + } else { + spin_lock_irq(q->queue_lock); + add_acct_request(q, req, where); + __blk_run_queue(q); +out_unlock: + spin_unlock_irq(q->queue_lock); + } out: - if (unplug || !queue_should_plug(q)) - __generic_unplug_device(q); - spin_unlock_irq(q->queue_lock); return 0; } @@ -1772,9 +1847,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) */ BUG_ON(blk_queued_rq(rq)); - drive_stat_acct(rq, 1); - __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); - + add_acct_request(q, rq, ELEVATOR_INSERT_BACK); spin_unlock_irqrestore(q->queue_lock, flags); return 0; @@ -2659,6 +2732,106 @@ int kblockd_schedule_delayed_work(struct request_queue *q, } EXPORT_SYMBOL(kblockd_schedule_delayed_work); +#define PLUG_MAGIC 0x91827364 + +void blk_start_plug(struct blk_plug *plug) +{ + struct task_struct *tsk = current; + + plug->magic = PLUG_MAGIC; + INIT_LIST_HEAD(&plug->list); + plug->should_sort = 0; + + /* + * If this is a nested plug, don't actually assign it. It will be + * flushed on its own. + */ + if (!tsk->plug) { + /* + * Store ordering should not be needed here, since a potential + * preempt will imply a full memory barrier + */ + tsk->plug = plug; + } +} +EXPORT_SYMBOL(blk_start_plug); + +static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct request *rqa = container_of(a, struct request, queuelist); + struct request *rqb = container_of(b, struct request, queuelist); + + return !(rqa->q == rqb->q); +} + +static void flush_plug_list(struct blk_plug *plug) +{ + struct request_queue *q; + unsigned long flags; + struct request *rq; + + BUG_ON(plug->magic != PLUG_MAGIC); + + if (list_empty(&plug->list)) + return; + + if (plug->should_sort) + list_sort(NULL, &plug->list, plug_rq_cmp); + + q = NULL; + local_irq_save(flags); + while (!list_empty(&plug->list)) { + rq = list_entry_rq(plug->list.next); + list_del_init(&rq->queuelist); + BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); + BUG_ON(!rq->q); + if (rq->q != q) { + if (q) { + __blk_run_queue(q); + spin_unlock(q->queue_lock); + } + q = rq->q; + spin_lock(q->queue_lock); + } + rq->cmd_flags &= ~REQ_ON_PLUG; + + /* + * rq is already accounted, so use raw insert + */ + __elv_add_request(q, rq, ELEVATOR_INSERT_SORT, 0); + } + + if (q) { + __blk_run_queue(q); + spin_unlock(q->queue_lock); + } + + BUG_ON(!list_empty(&plug->list)); + local_irq_restore(flags); +} + +static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug) +{ + flush_plug_list(plug); + + if (plug == tsk->plug) + tsk->plug = NULL; +} + +void blk_finish_plug(struct blk_plug *plug) +{ + if (plug) + __blk_finish_plug(current, plug); +} +EXPORT_SYMBOL(blk_finish_plug); + +void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug) +{ + __blk_finish_plug(tsk, plug); + tsk->plug = plug; +} +EXPORT_SYMBOL(__blk_flush_plug); + int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/block/blk-flush.c b/block/blk-flush.c index a867e3f524f3..1e2aa8a8908c 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -264,10 +264,9 @@ static bool blk_kick_flush(struct request_queue *q) static void flush_data_end_io(struct request *rq, int error) { struct request_queue *q = rq->q; - bool was_empty = elv_queue_empty(q); /* after populating an empty queue, kick it to avoid stall */ - if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty) + if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) __blk_run_queue(q); } diff --git a/block/elevator.c b/block/elevator.c index f98e92edc937..25713927c0d3 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) } EXPORT_SYMBOL(elv_rq_merge_ok); -static inline int elv_try_merge(struct request *__rq, struct bio *bio) +int elv_try_merge(struct request *__rq, struct bio *bio) { int ret = ELEVATOR_NO_MERGE; @@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) struct list_head *entry; int stop_flags; + BUG_ON(rq->cmd_flags & REQ_ON_PLUG); + if (q->last_merge == rq) q->last_merge = NULL; @@ -696,6 +698,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) void __elv_add_request(struct request_queue *q, struct request *rq, int where, int plug) { + BUG_ON(rq->cmd_flags & REQ_ON_PLUG); + if (rq->cmd_flags & REQ_SOFTBARRIER) { /* barriers are scheduling boundary, update end_sector */ if (rq->cmd_type == REQ_TYPE_FS || diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dddedfc0af81..16b286473042 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -152,6 +152,7 @@ enum rq_flag_bits { __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ + __REQ_ON_PLUG, /* on plug list */ __REQ_NR_BITS, /* stops here */ }; @@ -193,5 +194,6 @@ enum rq_flag_bits { #define REQ_IO_STAT (1 << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) #define REQ_SECURE (1 << __REQ_SECURE) +#define REQ_ON_PLUG (1 << __REQ_ON_PLUG) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f55b2a8b6610..5873037eeb91 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -871,6 +871,31 @@ struct request_queue *blk_alloc_queue(gfp_t); struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); +struct blk_plug { + unsigned long magic; + struct list_head list; + unsigned int should_sort; +}; + +extern void blk_start_plug(struct blk_plug *); +extern void blk_finish_plug(struct blk_plug *); +extern void __blk_flush_plug(struct task_struct *, struct blk_plug *); + +static inline void blk_flush_plug(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + if (unlikely(plug)) + __blk_flush_plug(tsk, plug); +} + +static inline bool blk_needs_flush_plug(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + return plug && !list_empty(&plug->list); +} + /* * tag stuff */ @@ -1294,6 +1319,23 @@ static inline long nr_blockdev_pages(void) return 0; } +static inline void blk_start_plug(struct list_head *list) +{ +} + +static inline void blk_finish_plug(struct list_head *list) +{ +} + +static inline void blk_flush_plug(struct task_struct *tsk) +{ +} + +static inline bool blk_needs_flush_plug(struct task_struct *tsk) +{ + return false; +} + #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 39b68edb388d..8857cf9adbb7 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -105,6 +105,7 @@ extern void elv_add_request(struct request_queue *, struct request *, int, int); extern void __elv_add_request(struct request_queue *, struct request *, int, int); extern void elv_insert(struct request_queue *, struct request *, int); extern int elv_merge(struct request_queue *, struct request **, struct bio *); +extern int elv_try_merge(struct request *, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, int); diff --git a/include/linux/sched.h b/include/linux/sched.h index 777d8a5ed06b..96ac22643742 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -99,6 +99,7 @@ struct robust_list_head; struct bio_list; struct fs_struct; struct perf_event_context; +struct blk_plug; /* * List of flags we want to share for kernel threads, @@ -1429,6 +1430,11 @@ struct task_struct { /* stacked block device info */ struct bio_list *bio_list; +#ifdef CONFIG_BLOCK +/* stack plugging */ + struct blk_plug *plug; +#endif + /* VM state */ struct reclaim_state *reclaim_state; diff --git a/kernel/exit.c b/kernel/exit.c index f9a45ebcc7b1..6a488ad2dce5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code) profile_task_exit(tsk); WARN_ON(atomic_read(&tsk->fs_excl)); + WARN_ON(blk_needs_flush_plug(tsk)); if (unlikely(in_interrupt())) panic("Aiee, killing interrupt handler!"); diff --git a/kernel/fork.c b/kernel/fork.c index 25e429152ddc..027c80e5162f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1204,6 +1204,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; +#ifdef CONFIG_BLOCK + p->plug = NULL; +#endif #ifdef CONFIG_FUTEX p->robust_list = NULL; #ifdef CONFIG_COMPAT diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4ec7ba..ca098bf4cc65 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3978,6 +3978,16 @@ need_resched_nonpreemptible: switch_count = &prev->nvcsw; } + /* + * If we are going to sleep and we have plugged IO queued, make + * sure to submit it to avoid deadlocks. + */ + if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) { + raw_spin_unlock(&rq->lock); + blk_flush_plug(prev); + raw_spin_lock(&rq->lock); + } + pre_schedule(rq, prev); if (unlikely(!rq->nr_running)) @@ -5333,6 +5343,7 @@ void __sched io_schedule(void) delayacct_blkio_start(); atomic_inc(&rq->nr_iowait); + blk_flush_plug(current); current->in_iowait = 1; schedule(); current->in_iowait = 0; @@ -5348,6 +5359,7 @@ long __sched io_schedule_timeout(long timeout) delayacct_blkio_start(); atomic_inc(&rq->nr_iowait); + blk_flush_plug(current); current->in_iowait = 1; ret = schedule_timeout(timeout); current->in_iowait = 0; -- cgit v1.2.3 From 7eaceaccab5f40bbfda044629a6298616aeaed50 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2011 08:52:07 +0100 Subject: block: remove per-queue plugging Code has been converted over to the new explicit on-stack plugging, and delay users have been converted to use the new API for that. So lets kill off the old plugging along with aops->sync_page(). Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 5 -- block/blk-core.c | 173 +++++------------------------------- block/blk-exec.c | 4 +- block/blk-flush.c | 3 +- block/blk-settings.c | 8 -- block/blk-throttle.c | 1 - block/blk.h | 2 - block/cfq-iosched.c | 8 -- block/deadline-iosched.c | 9 -- block/elevator.c | 43 +-------- block/noop-iosched.c | 8 -- drivers/block/cciss.c | 6 -- drivers/block/cpqarray.c | 3 - drivers/block/drbd/drbd_actlog.c | 2 - drivers/block/drbd/drbd_bitmap.c | 1 - drivers/block/drbd/drbd_int.h | 14 --- drivers/block/drbd/drbd_main.c | 33 +------ drivers/block/drbd/drbd_receiver.c | 20 +---- drivers/block/drbd/drbd_req.c | 4 - drivers/block/drbd/drbd_worker.c | 1 - drivers/block/drbd/drbd_wrappers.h | 18 ---- drivers/block/floppy.c | 1 - drivers/block/loop.c | 13 --- drivers/block/pktcdvd.c | 2 - drivers/block/umem.c | 16 +--- drivers/ide/ide-atapi.c | 3 +- drivers/ide/ide-io.c | 4 - drivers/ide/ide-park.c | 2 +- drivers/md/bitmap.c | 3 +- drivers/md/dm-crypt.c | 9 +- drivers/md/dm-kcopyd.c | 52 ++--------- drivers/md/dm-raid.c | 2 +- drivers/md/dm-raid1.c | 2 - drivers/md/dm-table.c | 24 ----- drivers/md/dm.c | 33 ++----- drivers/md/linear.c | 17 ---- drivers/md/md.c | 7 -- drivers/md/multipath.c | 31 ------- drivers/md/raid0.c | 16 ---- drivers/md/raid1.c | 83 ++++------------- drivers/md/raid10.c | 87 ++++-------------- drivers/md/raid5.c | 62 ++----------- drivers/md/raid5.h | 2 +- drivers/message/i2o/i2o_block.c | 6 +- drivers/mmc/card/queue.c | 3 +- drivers/s390/block/dasd.c | 2 +- drivers/s390/char/tape_block.c | 1 - drivers/scsi/scsi_transport_fc.c | 2 +- drivers/scsi/scsi_transport_sas.c | 6 +- drivers/target/target_core_iblock.c | 7 +- fs/adfs/inode.c | 1 - fs/affs/file.c | 2 - fs/aio.c | 4 +- fs/befs/linuxvfs.c | 1 - fs/bfs/file.c | 1 - fs/block_dev.c | 1 - fs/btrfs/disk-io.c | 79 ---------------- fs/btrfs/inode.c | 1 - fs/btrfs/volumes.c | 91 +++---------------- fs/buffer.c | 31 +------ fs/cifs/file.c | 30 ------- fs/direct-io.c | 5 +- fs/efs/inode.c | 1 - fs/exofs/inode.c | 1 - fs/ext2/inode.c | 2 - fs/ext3/inode.c | 3 - fs/ext4/inode.c | 4 - fs/fat/inode.c | 1 - fs/freevxfs/vxfs_subr.c | 1 - fs/fuse/inode.c | 1 - fs/gfs2/aops.c | 3 - fs/gfs2/meta_io.c | 1 - fs/hfs/inode.c | 2 - fs/hfsplus/inode.c | 2 - fs/hpfs/file.c | 1 - fs/isofs/inode.c | 1 - fs/jfs/inode.c | 1 - fs/jfs/jfs_metapage.c | 1 - fs/logfs/dev_bdev.c | 2 - fs/minix/inode.c | 1 - fs/nilfs2/btnode.c | 6 +- fs/nilfs2/gcinode.c | 1 - fs/nilfs2/inode.c | 1 - fs/nilfs2/mdt.c | 9 +- fs/nilfs2/page.c | 5 +- fs/nilfs2/page.h | 3 +- fs/ntfs/aops.c | 4 - fs/ntfs/compress.c | 3 +- fs/ocfs2/aops.c | 1 - fs/ocfs2/cluster/heartbeat.c | 4 - fs/omfs/file.c | 1 - fs/qnx4/inode.c | 1 - fs/reiserfs/inode.c | 1 - fs/sysv/itree.c | 1 - fs/ubifs/super.c | 1 - fs/udf/file.c | 1 - fs/udf/inode.c | 1 - fs/ufs/inode.c | 1 - fs/ufs/truncate.c | 2 +- fs/xfs/linux-2.6/xfs_aops.c | 1 - fs/xfs/linux-2.6/xfs_buf.c | 13 ++- include/linux/backing-dev.h | 16 ---- include/linux/blkdev.h | 31 ++----- include/linux/buffer_head.h | 1 - include/linux/device-mapper.h | 5 -- include/linux/elevator.h | 7 +- include/linux/fs.h | 1 - include/linux/pagemap.h | 12 --- include/linux/swap.h | 2 - mm/backing-dev.c | 6 -- mm/filemap.c | 67 ++------------ mm/memory-failure.c | 8 +- mm/nommu.c | 4 - mm/page-writeback.c | 2 +- mm/readahead.c | 12 --- mm/shmem.c | 1 - mm/swap_state.c | 5 +- mm/swapfile.c | 37 -------- mm/vmscan.c | 2 +- 119 files changed, 151 insertions(+), 1269 deletions(-) diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index b9a83dd24732..2a7b38c832c7 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -963,11 +963,6 @@ elevator_dispatch_fn* fills the dispatch queue with ready requests. elevator_add_req_fn* called to add a new request into the scheduler -elevator_queue_empty_fn returns true if the merge queue is empty. - Drivers shouldn't use this, but rather check - if elv_next_request is NULL (without losing the - request if one exists!) - elevator_former_req_fn elevator_latter_req_fn These return the request before or after the one specified in disk sort order. Used by the diff --git a/block/blk-core.c b/block/blk-core.c index 6efb55cc5af0..82a45898ba76 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -198,6 +198,19 @@ void blk_dump_rq_flags(struct request *rq, char *msg) } EXPORT_SYMBOL(blk_dump_rq_flags); +/* + * Make sure that plugs that were pending when this function was entered, + * are now complete and requests pushed to the queue. +*/ +static inline void queue_sync_plugs(struct request_queue *q) +{ + /* + * If the current process is plugged and has barriers submitted, + * we will livelock if we don't unplug first. + */ + blk_flush_plug(current); +} + static void blk_delay_work(struct work_struct *work) { struct request_queue *q; @@ -224,137 +237,6 @@ void blk_delay_queue(struct request_queue *q, unsigned long msecs) } EXPORT_SYMBOL(blk_delay_queue); -/* - * "plug" the device if there are no outstanding requests: this will - * force the transfer to start only after we have put all the requests - * on the list. - * - * This is called with interrupts off and no requests on the queue and - * with the queue lock held. - */ -void blk_plug_device(struct request_queue *q) -{ - WARN_ON(!irqs_disabled()); - - /* - * don't plug a stopped queue, it must be paired with blk_start_queue() - * which will restart the queueing - */ - if (blk_queue_stopped(q)) - return; - - if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { - mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); - trace_block_plug(q); - } -} -EXPORT_SYMBOL(blk_plug_device); - -/** - * blk_plug_device_unlocked - plug a device without queue lock held - * @q: The &struct request_queue to plug - * - * Description: - * Like @blk_plug_device(), but grabs the queue lock and disables - * interrupts. - **/ -void blk_plug_device_unlocked(struct request_queue *q) -{ - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - blk_plug_device(q); - spin_unlock_irqrestore(q->queue_lock, flags); -} -EXPORT_SYMBOL(blk_plug_device_unlocked); - -/* - * remove the queue from the plugged list, if present. called with - * queue lock held and interrupts disabled. - */ -int blk_remove_plug(struct request_queue *q) -{ - WARN_ON(!irqs_disabled()); - - if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) - return 0; - - del_timer(&q->unplug_timer); - return 1; -} -EXPORT_SYMBOL(blk_remove_plug); - -/* - * remove the plug and let it rip.. - */ -void __generic_unplug_device(struct request_queue *q) -{ - if (unlikely(blk_queue_stopped(q))) - return; - if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) - return; - - q->request_fn(q); -} - -/** - * generic_unplug_device - fire a request queue - * @q: The &struct request_queue in question - * - * Description: - * Linux uses plugging to build bigger requests queues before letting - * the device have at them. If a queue is plugged, the I/O scheduler - * is still adding and merging requests on the queue. Once the queue - * gets unplugged, the request_fn defined for the queue is invoked and - * transfers started. - **/ -void generic_unplug_device(struct request_queue *q) -{ - if (blk_queue_plugged(q)) { - spin_lock_irq(q->queue_lock); - __generic_unplug_device(q); - spin_unlock_irq(q->queue_lock); - } -} -EXPORT_SYMBOL(generic_unplug_device); - -static void blk_backing_dev_unplug(struct backing_dev_info *bdi, - struct page *page) -{ - struct request_queue *q = bdi->unplug_io_data; - - blk_unplug(q); -} - -void blk_unplug_work(struct work_struct *work) -{ - struct request_queue *q = - container_of(work, struct request_queue, unplug_work); - - trace_block_unplug_io(q); - q->unplug_fn(q); -} - -void blk_unplug_timeout(unsigned long data) -{ - struct request_queue *q = (struct request_queue *)data; - - trace_block_unplug_timer(q); - kblockd_schedule_work(q, &q->unplug_work); -} - -void blk_unplug(struct request_queue *q) -{ - /* - * devices don't necessarily have an ->unplug_fn defined - */ - if (q->unplug_fn) { - trace_block_unplug_io(q); - q->unplug_fn(q); - } -} -EXPORT_SYMBOL(blk_unplug); - /** * blk_start_queue - restart a previously stopped queue * @q: The &struct request_queue in question @@ -389,7 +271,6 @@ EXPORT_SYMBOL(blk_start_queue); **/ void blk_stop_queue(struct request_queue *q) { - blk_remove_plug(q); cancel_delayed_work(&q->delay_work); queue_flag_set(QUEUE_FLAG_STOPPED, q); } @@ -411,11 +292,10 @@ EXPORT_SYMBOL(blk_stop_queue); */ void blk_sync_queue(struct request_queue *q) { - del_timer_sync(&q->unplug_timer); del_timer_sync(&q->timeout); - cancel_work_sync(&q->unplug_work); throtl_shutdown_timer_wq(q); cancel_delayed_work_sync(&q->delay_work); + queue_sync_plugs(q); } EXPORT_SYMBOL(blk_sync_queue); @@ -430,14 +310,9 @@ EXPORT_SYMBOL(blk_sync_queue); */ void __blk_run_queue(struct request_queue *q) { - blk_remove_plug(q); - if (unlikely(blk_queue_stopped(q))) return; - if (elv_queue_empty(q)) - return; - /* * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. @@ -445,10 +320,8 @@ void __blk_run_queue(struct request_queue *q) if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { q->request_fn(q); queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else { - queue_flag_set(QUEUE_FLAG_PLUGGED, q); - kblockd_schedule_work(q, &q->unplug_work); - } + } else + queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); } EXPORT_SYMBOL(__blk_run_queue); @@ -535,8 +408,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q) return NULL; - q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; - q->backing_dev_info.unplug_io_data = q; q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; q->backing_dev_info.state = 0; @@ -556,13 +427,11 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, laptop_mode_timer_fn, (unsigned long) q); - init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); INIT_LIST_HEAD(&q->flush_queue[0]); INIT_LIST_HEAD(&q->flush_queue[1]); INIT_LIST_HEAD(&q->flush_data_in_flight); - INIT_WORK(&q->unplug_work, blk_unplug_work); INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -652,7 +521,6 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, q->request_fn = rfn; q->prep_rq_fn = NULL; q->unprep_rq_fn = NULL; - q->unplug_fn = generic_unplug_device; q->queue_flags = QUEUE_FLAG_DEFAULT; q->queue_lock = lock; @@ -910,8 +778,8 @@ out: } /* - * No available requests for this queue, unplug the device and wait for some - * requests to become available. + * No available requests for this queue, wait for some requests to become + * available. * * Called with q->queue_lock held, and returns with it unlocked. */ @@ -932,7 +800,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, trace_block_sleeprq(q, bio, rw_flags & 1); - __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); io_schedule(); @@ -1058,7 +925,7 @@ static void add_acct_request(struct request_queue *q, struct request *rq, int where) { drive_stat_acct(rq, 1); - __elv_add_request(q, rq, where, 0); + __elv_add_request(q, rq, where); } /** @@ -2798,7 +2665,7 @@ static void flush_plug_list(struct blk_plug *plug) /* * rq is already accounted, so use raw insert */ - __elv_add_request(q, rq, ELEVATOR_INSERT_SORT, 0); + __elv_add_request(q, rq, ELEVATOR_INSERT_SORT); } if (q) { diff --git a/block/blk-exec.c b/block/blk-exec.c index cf1456a02acd..81e31819a597 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -54,8 +54,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, rq->end_io = done; WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); - __elv_add_request(q, rq, where, 1); - __generic_unplug_device(q); + __elv_add_request(q, rq, where); + __blk_run_queue(q); /* the queue is stopped so it won't be plugged+unplugged */ if (rq->cmd_type == REQ_TYPE_PM_RESUME) q->request_fn(q); diff --git a/block/blk-flush.c b/block/blk-flush.c index 1e2aa8a8908c..671fa9da7560 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -194,7 +194,6 @@ static void flush_end_io(struct request *flush_rq, int error) { struct request_queue *q = flush_rq->q; struct list_head *running = &q->flush_queue[q->flush_running_idx]; - bool was_empty = elv_queue_empty(q); bool queued = false; struct request *rq, *n; @@ -213,7 +212,7 @@ static void flush_end_io(struct request *flush_rq, int error) } /* after populating an empty queue, kick it to avoid stall */ - if (queued && was_empty) + if (queued) __blk_run_queue(q); } diff --git a/block/blk-settings.c b/block/blk-settings.c index 36c8c1f2af18..c8d68921dddb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -164,14 +164,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) blk_queue_congestion_threshold(q); q->nr_batching = BLK_BATCH_REQ; - q->unplug_thresh = 4; /* hmm */ - q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */ - if (q->unplug_delay == 0) - q->unplug_delay = 1; - - q->unplug_timer.function = blk_unplug_timeout; - q->unplug_timer.data = (unsigned long)q; - blk_set_default_limits(&q->limits); blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a89043a3caa4..b8dcdc2663a1 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -800,7 +800,6 @@ out: if (nr_disp) { while((bio = bio_list_pop(&bio_list_on_stack))) generic_make_request(bio); - blk_unplug(q); } return nr_disp; } diff --git a/block/blk.h b/block/blk.h index 284b500852bd..49d21af81d07 100644 --- a/block/blk.h +++ b/block/blk.h @@ -18,8 +18,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); -void blk_unplug_work(struct work_struct *work); -void blk_unplug_timeout(unsigned long data); void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); void blk_add_timer(struct request *); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 3202c7e87fb3..ef631539dd2a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -499,13 +499,6 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) } } -static int cfq_queue_empty(struct request_queue *q) -{ - struct cfq_data *cfqd = q->elevator->elevator_data; - - return !cfqd->rq_queued; -} - /* * Scale schedule slice based on io priority. Use the sync time slice only * if a queue is marked sync and has sync io queued. A sync queue with async @@ -4061,7 +4054,6 @@ static struct elevator_type iosched_cfq = { .elevator_add_req_fn = cfq_insert_request, .elevator_activate_req_fn = cfq_activate_request, .elevator_deactivate_req_fn = cfq_deactivate_request, - .elevator_queue_empty_fn = cfq_queue_empty, .elevator_completed_req_fn = cfq_completed_request, .elevator_former_req_fn = elv_rb_former_request, .elevator_latter_req_fn = elv_rb_latter_request, diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index b547cbca7b23..5139c0ea1864 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -326,14 +326,6 @@ dispatch_request: return 1; } -static int deadline_queue_empty(struct request_queue *q) -{ - struct deadline_data *dd = q->elevator->elevator_data; - - return list_empty(&dd->fifo_list[WRITE]) - && list_empty(&dd->fifo_list[READ]); -} - static void deadline_exit_queue(struct elevator_queue *e) { struct deadline_data *dd = e->elevator_data; @@ -445,7 +437,6 @@ static struct elevator_type iosched_deadline = { .elevator_merge_req_fn = deadline_merged_requests, .elevator_dispatch_fn = deadline_dispatch_requests, .elevator_add_req_fn = deadline_add_request, - .elevator_queue_empty_fn = deadline_queue_empty, .elevator_former_req_fn = elv_rb_former_request, .elevator_latter_req_fn = elv_rb_latter_request, .elevator_init_fn = deadline_init_queue, diff --git a/block/elevator.c b/block/elevator.c index 25713927c0d3..3ea208256e78 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -619,21 +619,12 @@ void elv_quiesce_end(struct request_queue *q) void elv_insert(struct request_queue *q, struct request *rq, int where) { - int unplug_it = 1; - trace_block_rq_insert(q, rq); rq->q = q; switch (where) { case ELEVATOR_INSERT_REQUEUE: - /* - * Most requeues happen because of a busy condition, - * don't force unplug of the queue for that case. - * Clear unplug_it and fall through. - */ - unplug_it = 0; - case ELEVATOR_INSERT_FRONT: rq->cmd_flags |= REQ_SOFTBARRIER; list_add(&rq->queuelist, &q->queue_head); @@ -679,24 +670,14 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) rq->cmd_flags |= REQ_SOFTBARRIER; blk_insert_flush(rq); break; - default: printk(KERN_ERR "%s: bad insertion point %d\n", __func__, where); BUG(); } - - if (unplug_it && blk_queue_plugged(q)) { - int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC] - - queue_in_flight(q); - - if (nrq >= q->unplug_thresh) - __generic_unplug_device(q); - } } -void __elv_add_request(struct request_queue *q, struct request *rq, int where, - int plug) +void __elv_add_request(struct request_queue *q, struct request *rq, int where) { BUG_ON(rq->cmd_flags & REQ_ON_PLUG); @@ -711,38 +692,20 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where, where == ELEVATOR_INSERT_SORT) where = ELEVATOR_INSERT_BACK; - if (plug) - blk_plug_device(q); - elv_insert(q, rq, where); } EXPORT_SYMBOL(__elv_add_request); -void elv_add_request(struct request_queue *q, struct request *rq, int where, - int plug) +void elv_add_request(struct request_queue *q, struct request *rq, int where) { unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - __elv_add_request(q, rq, where, plug); + __elv_add_request(q, rq, where); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(elv_add_request); -int elv_queue_empty(struct request_queue *q) -{ - struct elevator_queue *e = q->elevator; - - if (!list_empty(&q->queue_head)) - return 0; - - if (e->ops->elevator_queue_empty_fn) - return e->ops->elevator_queue_empty_fn(q); - - return 1; -} -EXPORT_SYMBOL(elv_queue_empty); - struct request *elv_latter_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 232c4b38cd37..06389e9ef96d 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -39,13 +39,6 @@ static void noop_add_request(struct request_queue *q, struct request *rq) list_add_tail(&rq->queuelist, &nd->queue); } -static int noop_queue_empty(struct request_queue *q) -{ - struct noop_data *nd = q->elevator->elevator_data; - - return list_empty(&nd->queue); -} - static struct request * noop_former_request(struct request_queue *q, struct request *rq) { @@ -90,7 +83,6 @@ static struct elevator_type elevator_noop = { .elevator_merge_req_fn = noop_merged_requests, .elevator_dispatch_fn = noop_dispatch, .elevator_add_req_fn = noop_add_request, - .elevator_queue_empty_fn = noop_queue_empty, .elevator_former_req_fn = noop_former_request, .elevator_latter_req_fn = noop_latter_request, .elevator_init_fn = noop_init_queue, diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 9279272b3732..35658f445fca 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3170,12 +3170,6 @@ static void do_cciss_request(struct request_queue *q) int sg_index = 0; int chained = 0; - /* We call start_io here in case there is a command waiting on the - * queue that has not been sent. - */ - if (blk_queue_plugged(q)) - goto startio; - queue: creq = blk_peek_request(q); if (!creq) diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 946dad4caef3..b2fceb53e809 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -911,9 +911,6 @@ static void do_ida_request(struct request_queue *q) struct scatterlist tmp_sg[SG_MAX]; int i, dir, seg; - if (blk_queue_plugged(q)) - goto startio; - queue_next: creq = blk_peek_request(q); if (!creq) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index ba95cba192be..2096628d6e65 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -689,8 +689,6 @@ void drbd_al_to_on_disk_bm(struct drbd_conf *mdev) } } - drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); - /* always (try to) flush bitmap to stable storage */ drbd_md_flush(mdev); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index fd42832f785b..0645ca829a94 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -840,7 +840,6 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) for (i = 0; i < num_pages; i++) bm_page_io_async(mdev, b, i, rw); - drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev)); wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0); if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) { diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 3803a0348937..0b5718e19586 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2382,20 +2382,6 @@ static inline int drbd_queue_order_type(struct drbd_conf *mdev) return QUEUE_ORDERED_NONE; } -static inline void drbd_blk_run_queue(struct request_queue *q) -{ - if (q && q->unplug_fn) - q->unplug_fn(q); -} - -static inline void drbd_kick_lo(struct drbd_conf *mdev) -{ - if (get_ldev(mdev)) { - drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev)); - put_ldev(mdev); - } -} - static inline void drbd_md_flush(struct drbd_conf *mdev) { int r; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 29cd0dc9fe4f..6049cb85310d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2719,35 +2719,6 @@ static int drbd_release(struct gendisk *gd, fmode_t mode) return 0; } -static void drbd_unplug_fn(struct request_queue *q) -{ - struct drbd_conf *mdev = q->queuedata; - - /* unplug FIRST */ - spin_lock_irq(q->queue_lock); - blk_remove_plug(q); - spin_unlock_irq(q->queue_lock); - - /* only if connected */ - spin_lock_irq(&mdev->req_lock); - if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) { - D_ASSERT(mdev->state.role == R_PRIMARY); - if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) { - /* add to the data.work queue, - * unless already queued. - * XXX this might be a good addition to drbd_queue_work - * anyways, to detect "double queuing" ... */ - if (list_empty(&mdev->unplug_work.list)) - drbd_queue_work(&mdev->data.work, - &mdev->unplug_work); - } - } - spin_unlock_irq(&mdev->req_lock); - - if (mdev->state.disk >= D_INCONSISTENT) - drbd_kick_lo(mdev); -} - static void drbd_set_defaults(struct drbd_conf *mdev) { /* This way we get a compile error when sync_conf grows, @@ -3222,9 +3193,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); blk_queue_merge_bvec(q, drbd_merge_bvec); - q->queue_lock = &mdev->req_lock; /* needed since we use */ - /* plugging on a queue, that actually has no requests! */ - q->unplug_fn = drbd_unplug_fn; + q->queue_lock = &mdev->req_lock; mdev->md_io_page = alloc_page(GFP_KERNEL); if (!mdev->md_io_page) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 24487d4fb202..84132f8bf8a4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -187,15 +187,6 @@ static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int return NULL; } -/* kick lower level device, if we have more than (arbitrary number) - * reference counts on it, which typically are locally submitted io - * requests. don't use unacked_cnt, so we speed up proto A and B, too. */ -static void maybe_kick_lo(struct drbd_conf *mdev) -{ - if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark) - drbd_kick_lo(mdev); -} - static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed) { struct drbd_epoch_entry *e; @@ -219,7 +210,6 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) LIST_HEAD(reclaimed); struct drbd_epoch_entry *e, *t; - maybe_kick_lo(mdev); spin_lock_irq(&mdev->req_lock); reclaim_net_ee(mdev, &reclaimed); spin_unlock_irq(&mdev->req_lock); @@ -436,8 +426,7 @@ void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head) while (!list_empty(head)) { prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE); spin_unlock_irq(&mdev->req_lock); - drbd_kick_lo(mdev); - schedule(); + io_schedule(); finish_wait(&mdev->ee_wait, &wait); spin_lock_irq(&mdev->req_lock); } @@ -1147,7 +1136,6 @@ next_bio: drbd_generic_make_request(mdev, fault_type, bio); } while (bios); - maybe_kick_lo(mdev); return 0; fail: @@ -1167,9 +1155,6 @@ static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsign inc_unacked(mdev); - if (mdev->net_conf->wire_protocol != DRBD_PROT_C) - drbd_kick_lo(mdev); - mdev->current_epoch->barrier_nr = p->barrier; rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR); @@ -3556,9 +3541,6 @@ static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - if (mdev->state.disk >= D_INCONSISTENT) - drbd_kick_lo(mdev); - /* Make sure we've acked all the TCP data associated * with the data requests being unplugged */ drbd_tcp_quickack(mdev->data.socket); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 11a75d32a2e2..ad3fc6228f27 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -960,10 +960,6 @@ allocate_barrier: bio_endio(req->private_bio, -EIO); } - /* we need to plug ALWAYS since we possibly need to kick lo_dev. - * we plug after submit, so we won't miss an unplug event */ - drbd_plug_device(mdev); - return 0; fail_conflicting: diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 34f224b018b3..e027446590d3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -792,7 +792,6 @@ int drbd_resync_finished(struct drbd_conf *mdev) * queue (or even the read operations for those packets * is not finished by now). Retry in 100ms. */ - drbd_kick_lo(mdev); __set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ / 10); w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC); diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h index defdb5013ea3..53586fa5ae1b 100644 --- a/drivers/block/drbd/drbd_wrappers.h +++ b/drivers/block/drbd/drbd_wrappers.h @@ -45,24 +45,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev, generic_make_request(bio); } -static inline void drbd_plug_device(struct drbd_conf *mdev) -{ - struct request_queue *q; - q = bdev_get_queue(mdev->this_bdev); - - spin_lock_irq(q->queue_lock); - -/* XXX the check on !blk_queue_plugged is redundant, - * implicitly checked in blk_plug_device */ - - if (!blk_queue_plugged(q)) { - blk_plug_device(q); - del_timer(&q->unplug_timer); - /* unplugging should not happen automatically... */ - } - spin_unlock_irq(q->queue_lock); -} - static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm) { return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b9ba04fc2b34..271142b9e2cd 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3837,7 +3837,6 @@ static int __floppy_read_block_0(struct block_device *bdev) bio.bi_end_io = floppy_rb0_complete; submit_bio(READ, &bio); - generic_unplug_device(bdev_get_queue(bdev)); process_fd_request(); wait_for_completion(&complete); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 49e6a545eb63..01b8e4a87c9f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -541,17 +541,6 @@ out: return 0; } -/* - * kick off io on the underlying address space - */ -static void loop_unplug(struct request_queue *q) -{ - struct loop_device *lo = q->queuedata; - - queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q); - blk_run_address_space(lo->lo_backing_file->f_mapping); -} - struct switch_request { struct file *file; struct completion wait; @@ -918,7 +907,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, */ blk_queue_make_request(lo->lo_queue, loop_make_request); lo->lo_queue->queuedata = lo; - lo->lo_queue->unplug_fn = loop_unplug; if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) blk_queue_flush(lo->lo_queue, REQ_FLUSH); @@ -1020,7 +1008,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) kthread_stop(lo->lo_thread); - lo->lo_queue->unplug_fn = NULL; lo->lo_backing_file = NULL; loop_release_xfer(lo); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 77d70eebb6b2..d20e13f80001 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1606,8 +1606,6 @@ static int kcdrwd(void *foobar) min_sleep_time = pkt->sleep_time; } - generic_unplug_device(bdev_get_queue(pd->bdev)); - VPRINTK("kcdrwd: sleeping\n"); residue = schedule_timeout(min_sleep_time); VPRINTK("kcdrwd: wake up\n"); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 8be57151f5d6..653439faa729 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -241,8 +241,7 @@ static void dump_dmastat(struct cardinfo *card, unsigned int dmastat) * * Whenever IO on the active page completes, the Ready page is activated * and the ex-Active page is clean out and made Ready. - * Otherwise the Ready page is only activated when it becomes full, or - * when mm_unplug_device is called via the unplug_io_fn. + * Otherwise the Ready page is only activated when it becomes full. * * If a request arrives while both pages a full, it is queued, and b_rdev is * overloaded to record whether it was a read or a write. @@ -333,17 +332,6 @@ static inline void reset_page(struct mm_page *page) page->biotail = &page->bio; } -static void mm_unplug_device(struct request_queue *q) -{ - struct cardinfo *card = q->queuedata; - unsigned long flags; - - spin_lock_irqsave(&card->lock, flags); - if (blk_remove_plug(q)) - activate(card); - spin_unlock_irqrestore(&card->lock, flags); -} - /* * If there is room on Ready page, take * one bh off list and add it. @@ -535,7 +523,6 @@ static int mm_make_request(struct request_queue *q, struct bio *bio) *card->biotail = bio; bio->bi_next = NULL; card->biotail = &bio->bi_next; - blk_plug_device(q); spin_unlock_irq(&card->lock); return 0; @@ -907,7 +894,6 @@ static int __devinit mm_pci_probe(struct pci_dev *dev, blk_queue_make_request(card->queue, mm_make_request); card->queue->queue_lock = &card->lock; card->queue->queuedata = card; - card->queue->unplug_fn = mm_unplug_device; tasklet_init(&card->tasklet, process_page, (unsigned long)card); diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index e88a2cf17711..6f218e014e99 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -233,8 +233,7 @@ int ide_queue_sense_rq(ide_drive_t *drive, void *special) drive->hwif->rq = NULL; - elv_add_request(drive->queue, &drive->sense_rq, - ELEVATOR_INSERT_FRONT, 0); + elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT); return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 999dac054bcc..f4077840d3ab 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -549,8 +549,6 @@ plug_device_2: if (rq) blk_requeue_request(q, rq); - if (!elv_queue_empty(q)) - blk_plug_device(q); } void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) @@ -562,8 +560,6 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq) if (rq) blk_requeue_request(q, rq); - if (!elv_queue_empty(q)) - blk_plug_device(q); spin_unlock_irqrestore(q->queue_lock, flags); } diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 88a380c5a470..6ab9ab2a5081 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -52,7 +52,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) rq->cmd[0] = REQ_UNPARK_HEADS; rq->cmd_len = 1; rq->cmd_type = REQ_TYPE_SPECIAL; - elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1); + elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); out: return; diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 9a35320fb59f..54bfc274b39a 100644 --