From f8a5b12247fe18f7fed801ad262a7ab190e1f848 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 Dec 2016 09:24:51 -0700 Subject: blk-mq: make mq_ops a const pointer We never change it, make that clear. Signed-off-by: Jens Axboe Reviewed-by: Bart Van Assche --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index a8e67a155d04..79e1cb0f7b15 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -639,7 +639,7 @@ struct blk_mq_timeout_data { void blk_mq_rq_timed_out(struct request *req, bool reserved) { - struct blk_mq_ops *ops = req->q->mq_ops; + const struct blk_mq_ops *ops = req->q->mq_ops; enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; /* -- cgit v1.2.3 From c5082b70adfe8e1ea1cf4a8eff92c9f260e364d2 Mon Sep 17 00:00:00 2001 From: Alden Tondettar Date: Sun, 15 Jan 2017 15:31:56 -0700 Subject: partitions/efi: Fix integer overflow in GPT size calculation If a GUID Partition Table claims to have more than 2**25 entries, the calculation of the partition table size in alloc_read_gpt_entries() will overflow a 32-bit integer and not enough space will be allocated for the table. Nothing seems to get written out of bounds, but later efi_partition() will read up to 32768 bytes from a 128 byte buffer, possibly OOPSing or exposing information to /proc/partitions and uevents. The problem exists on both 64-bit and 32-bit platforms. Fix the overflow and also print a meaningful debug message if the table size is too large. Signed-off-by: Alden Tondettar Acked-by: Ard Biesheuvel Signed-off-by: Jens Axboe --- block/partitions/efi.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'block') diff --git a/block/partitions/efi.c b/block/partitions/efi.c index bcd86e5cd546..39f70d968754 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -293,7 +293,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, if (!gpt) return NULL; - count = le32_to_cpu(gpt->num_partition_entries) * + count = (size_t)le32_to_cpu(gpt->num_partition_entries) * le32_to_cpu(gpt->sizeof_partition_entry); if (!count) return NULL; @@ -352,7 +352,7 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, gpt_header **gpt, gpt_entry **ptes) { u32 crc, origcrc; - u64 lastlba; + u64 lastlba, pt_size; if (!ptes) return 0; @@ -434,13 +434,20 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, goto fail; } + /* Sanity check partition table size */ + pt_size = (u64)le32_to_cpu((*gpt)->num_partition_entries) * + le32_to_cpu((*gpt)->sizeof_partition_entry); + if (pt_size > KMALLOC_MAX_SIZE) { + pr_debug("GUID Partition Table is too large: %llu > %lu bytes\n", + (unsigned long long)pt_size, KMALLOC_MAX_SIZE); + goto fail; + } + if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) goto fail; /* Check the GUID Partition Entry Array CRC */ - crc = efi_crc32((const unsigned char *) (*ptes), - le32_to_cpu((*gpt)->num_partition_entries) * - le32_to_cpu((*gpt)->sizeof_partition_entry)); + crc = efi_crc32((const unsigned char *) (*ptes), pt_size); if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { pr_debug("GUID Partition Entry Array CRC check failed.\n"); -- cgit v1.2.3 From c51ca6cf545bc51ad38bd50816bde37c647d608d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 10 Dec 2016 15:13:59 -0700 Subject: block: move existing elevator ops to union Prep patch for adding MQ ops as well, since doing anon unions with named initializers doesn't work on older compilers. Signed-off-by: Jens Axboe Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval --- block/blk-ioc.c | 8 +++---- block/blk-merge.c | 4 ++-- block/blk.h | 10 ++++---- block/cfq-iosched.c | 2 +- block/deadline-iosched.c | 2 +- block/elevator.c | 60 ++++++++++++++++++++++++------------------------ block/noop-iosched.c | 2 +- 7 files changed, 44 insertions(+), 44 deletions(-) (limited to 'block') diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 381cb50a673c..ab372092a57d 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -43,8 +43,8 @@ static void ioc_exit_icq(struct io_cq *icq) if (icq->flags & ICQ_EXITED) return; - if (et->ops.elevator_exit_icq_fn) - et->ops.elevator_exit_icq_fn(icq); + if (et->ops.sq.elevator_exit_icq_fn) + et->ops.sq.elevator_exit_icq_fn(icq); icq->flags |= ICQ_EXITED; } @@ -383,8 +383,8 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { hlist_add_head(&icq->ioc_node, &ioc->icq_list); list_add(&icq->q_node, &q->icq_list); - if (et->ops.elevator_init_icq_fn) - et->ops.elevator_init_icq_fn(icq); + if (et->ops.sq.elevator_init_icq_fn) + et->ops.sq.elevator_init_icq_fn(icq); } else { kmem_cache_free(et->icq_cache, icq); icq = ioc_lookup_icq(ioc, q); diff --git a/block/blk-merge.c b/block/blk-merge.c index 182398cb1524..480570b691dc 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -763,8 +763,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_allow_rq_merge_fn) - if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next)) + if (e->type->ops.sq.elevator_allow_rq_merge_fn) + if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) return 0; return attempt_merge(q, rq, next); diff --git a/block/blk.h b/block/blk.h index 041185e5f129..f46c0ac8ae3d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -167,7 +167,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) return NULL; } if (unlikely(blk_queue_bypass(q)) || - !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) + !q->elevator->type->ops.sq.elevator_dispatch_fn(q, 0)) return NULL; } } @@ -176,16 +176,16 @@ static inline void elv_activate_rq(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_activate_req_fn) - e->type->ops.elevator_activate_req_fn(q, rq); + if (e->type->ops.sq.elevator_activate_req_fn) + e->type->ops.sq.elevator_activate_req_fn(q, rq); } static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_deactivate_req_fn) - e->type->ops.elevator_deactivate_req_fn(q, rq); + if (e->type->ops.sq.elevator_deactivate_req_fn) + e->type->ops.sq.elevator_deactivate_req_fn(q, rq); } #ifdef CONFIG_FAIL_IO_TIMEOUT diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c73a6fcaeb9d..37aeb20fa454 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -4837,7 +4837,7 @@ static struct elv_fs_entry cfq_attrs[] = { }; static struct elevator_type iosched_cfq = { - .ops = { + .ops.sq = { .elevator_merge_fn = cfq_merge, .elevator_merged_fn = cfq_merged_request, .elevator_merge_req_fn = cfq_merged_requests, diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 55e0bb6d7da7..05fc0ea25a98 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -439,7 +439,7 @@ static struct elv_fs_entry deadline_attrs[] = { }; static struct elevator_type iosched_deadline = { - .ops = { + .ops.sq = { .elevator_merge_fn = deadline_merge, .elevator_merged_fn = deadline_merged_request, .elevator_merge_req_fn = deadline_merged_requests, diff --git a/block/elevator.c b/block/elevator.c index 40f0c04e5ad3..022a26830297 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -58,8 +58,8 @@ static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio) struct request_queue *q = rq->q; struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_allow_bio_merge_fn) - return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio); + if (e->type->ops.sq.elevator_allow_bio_merge_fn) + return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio); return 1; } @@ -224,7 +224,7 @@ int elevator_init(struct request_queue *q, char *name) } } - err = e->ops.elevator_init_fn(q, e); + err = e->ops.sq.elevator_init_fn(q, e); if (err) elevator_put(e); return err; @@ -234,8 +234,8 @@ EXPORT_SYMBOL(elevator_init); void elevator_exit(struct elevator_queue *e) { mutex_lock(&e->sysfs_lock); - if (e->type->ops.elevator_exit_fn) - e->type->ops.elevator_exit_fn(e); + if (e->type->ops.sq.elevator_exit_fn) + e->type->ops.sq.elevator_exit_fn(e); mutex_unlock(&e->sysfs_lock); kobject_put(&e->kobj); @@ -443,8 +443,8 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) return ELEVATOR_BACK_MERGE; } - if (e->type->ops.elevator_merge_fn) - return e->type->ops.elevator_merge_fn(q, req, bio); + if (e->type->ops.sq.elevator_merge_fn) + return e->type->ops.sq.elevator_merge_fn(q, req, bio); return ELEVATOR_NO_MERGE; } @@ -495,8 +495,8 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_merged_fn) - e->type->ops.elevator_merged_fn(q, rq, type); + if (e->type->ops.sq.elevator_merged_fn) + e->type->ops.sq.elevator_merged_fn(q, rq, type); if (type == ELEVATOR_BACK_MERGE) elv_rqhash_reposition(q, rq); @@ -510,8 +510,8 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, struct elevator_queue *e = q->elevator; const int next_sorted = next->rq_flags & RQF_SORTED; - if (next_sorted && e->type->ops.elevator_merge_req_fn) - e->type->ops.elevator_merge_req_fn(q, rq, next); + if (next_sorted && e->type->ops.sq.elevator_merge_req_fn) + e->type->ops.sq.elevator_merge_req_fn(q, rq, next); elv_rqhash_reposition(q, rq); @@ -528,8 +528,8 @@ void elv_bio_merged(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_bio_merged_fn) - e->type->ops.elevator_bio_merged_fn(q, rq, bio); + if (e->type->ops.sq.elevator_bio_merged_fn) + e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio); } #ifdef CONFIG_PM @@ -578,7 +578,7 @@ void elv_drain_elevator(struct request_queue *q) lockdep_assert_held(q->queue_lock); - while (q->elevator->type->ops.elevator_dispatch_fn(q, 1)) + while (q->elevator->type->ops.sq.elevator_dispatch_fn(q, 1)) ; if (q->nr_sorted && printed++ < 10) { printk(KERN_ERR "%s: forced dispatching is broken " @@ -653,7 +653,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) * rq cannot be accessed after calling * elevator_add_req_fn. */ - q->elevator->type->ops.elevator_add_req_fn(q, rq); + q->elevator->type->ops.sq.elevator_add_req_fn(q, rq); break; case ELEVATOR_INSERT_FLUSH: @@ -682,8 +682,8 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_latter_req_fn) - return e->type->ops.elevator_latter_req_fn(q, rq); + if (e->type->ops.sq.elevator_latter_req_fn) + return e->type->ops.sq.elevator_latter_req_fn(q, rq); return NULL; } @@ -691,8 +691,8 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_former_req_fn) - return e->type->ops.elevator_former_req_fn(q, rq); + if (e->type->ops.sq.elevator_former_req_fn) + return e->type->ops.sq.elevator_former_req_fn(q, rq); return NULL; } @@ -701,8 +701,8 @@ int elv_set_request(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_set_req_fn) - return e->type->ops.elevator_set_req_fn(q, rq, bio, gfp_mask); + if (e->type->ops.sq.elevator_set_req_fn) + return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask); return 0; } @@ -710,16 +710,16 @@ void elv_put_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_put_req_fn) - e->type->ops.elevator_put_req_fn(rq); + if (e->type->ops.sq.elevator_put_req_fn) + e->type->ops.sq.elevator_put_req_fn(rq); } int elv_may_queue(struct request_queue *q, unsigned int op) { struct elevator_queue *e = q->elevator; - if (e->type->ops.elevator_may_queue_fn) - return e->type->ops.elevator_may_queue_fn(q, op); + if (e->type->ops.sq.elevator_may_queue_fn) + return e->type->ops.sq.elevator_may_queue_fn(q, op); return ELV_MQUEUE_MAY; } @@ -734,8 +734,8 @@ void elv_completed_request(struct request_queue *q, struct request *rq) if (blk_account_rq(rq)) { q->in_flight[rq_is_sync(rq)]--; if ((rq->rq_flags & RQF_SORTED) && - e->type->ops.elevator_completed_req_fn) - e->type->ops.elevator_completed_req_fn(q, rq); + e->type->ops.sq.elevator_completed_req_fn) + e->type->ops.sq.elevator_completed_req_fn(q, rq); } } @@ -803,8 +803,8 @@ int elv_register_queue(struct request_queue *q) } kobject_uevent(&e->kobj, KOBJ_ADD); e->registered = 1; - if (e->type->ops.elevator_registered_fn) - e->type->ops.elevator_registered_fn(q); + if (e->type->ops.sq.elevator_registered_fn) + e->type->ops.sq.elevator_registered_fn(q); } return error; } @@ -912,7 +912,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) spin_unlock_irq(q->queue_lock); /* allocate, init and register new elevator */ - err = new_e->ops.elevator_init_fn(q, new_e); + err = new_e->ops.sq.elevator_init_fn(q, new_e); if (err) goto fail_init; diff --git a/block/noop-iosched.c b/block/noop-iosched.c index a163c487cf38..2d1b15d89b45 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -92,7 +92,7 @@ static void noop_exit_queue(struct elevator_queue *e) } static struct elevator_type elevator_noop = { - .ops = { + .ops.sq = { .elevator_merge_req_fn = noop_merged_requests, .elevator_dispatch_fn = noop_dispatch, .elevator_add_req_fn = noop_add_request, -- cgit v1.2.3 From c23ecb426084a98418ee29124c139e37c274ad04 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Dec 2016 14:23:43 -0700 Subject: block: move rq_ioc() to blk.h We want to use it outside of blk-core.c. Signed-off-by: Jens Axboe Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Omar Sandoval --- block/blk-core.c | 16 ---------------- block/blk.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 61ba08c58b64..92baea07acbc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1039,22 +1039,6 @@ static bool blk_rq_should_init_elevator(struct bio *bio) return true; } -/** - * rq_ioc - determine io_context for request allocation - * @bio: request being allocated is for this bio (can be %NULL) - * - * Determine io_context to use for request allocation for @bio. May return - * %NULL if %current->io_context doesn't exist. - */ -static struct io_context *rq_ioc(struct bio *bio) -{ -#ifdef CONFIG_BLK_CGROUP - if (bio && bio->bi_ioc) - return bio->bi_ioc; -#endif - return current->io_context; -} - /** * __get_request - get a free request * @rl: request list to allocate from diff --git a/block/blk.h b/block/blk.h index f46c0ac8ae3d..9a716b5925a4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -263,6 +263,22 @@ void ioc_clear_queue(struct request_queue *q); int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node); +/** + * rq_ioc - determine io_context for request allocation + * @bio: request being allocated is for this bio (can be %NULL) + * + * Determine io_context to use for request allocation for @bio. May return + * %NULL if %current->io_context doesn't exist. + */ +static inline struct io_context *rq_ioc(struct bio *bio) +{ +#ifdef CONFIG_BLK_CGROUP + if (bio && bio->bi_ioc) + return bio->bi_ioc; +#endif + return current->io_context; +} + /** * create_io_context - try to create task->io_context * @gfp_mask: allocation mask -- cgit v1.2.3 From 16a3c2a70cad5ccdc2dc0a4544bff82554807493 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Dec 2016 14:27:46 -0700 Subject: blk-mq: un-export blk_mq_free_hctx_request() It's only used in blk-mq, kill it from the main exported header and kill the symbol export as well. Signed-off-by: Jens Axboe Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Omar Sandoval --- block/blk-mq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 79e1cb0f7b15..f49f6325b332 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -337,15 +337,14 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, blk_queue_exit(q); } -void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq) +static void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, + struct request *rq) { struct blk_mq_ctx *ctx = rq->mq_ctx; ctx->rq_completed[rq_is_sync(rq)]++; __blk_mq_free_request(hctx, ctx, rq); - } -EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request); void blk_mq_free_request(struct request *rq) { -- cgit v1.2.3 From 2c3ad667902ef6f4b60ef0a3c6f7d8c2b007769a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Dec 2016 14:34:47 -0700 Subject: blk-mq: export some helpers we need to the scheduling framework Signed-off-by: Jens Axboe Reviewed-by: Johannes Thumshirn Reviewed-by: Omar Sandoval --- block/blk-mq.c | 39 +++++++++++++++++++++------------------ block/blk-mq.h | 25 +++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 18 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index f49f6325b332..9fc521755e22 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -167,8 +167,8 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) } EXPORT_SYMBOL(blk_mq_can_queue); -static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, - struct request *rq, unsigned int op) +void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, + struct request *rq, unsigned int op) { INIT_LIST_HEAD(&rq->queuelist); /* csd/requeue_work/fifo_time is initialized before use */ @@ -213,9 +213,10 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, ctx->rq_dispatched[op_is_sync(op)]++; } +EXPORT_SYMBOL_GPL(blk_mq_rq_ctx_init); -static struct request * -__blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op) +struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, + unsigned int op) { struct request *rq; unsigned int tag; @@ -236,6 +237,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, unsigned int op) return NULL; } +EXPORT_SYMBOL_GPL(__blk_mq_alloc_request); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, unsigned int flags) @@ -319,8 +321,8 @@ out_queue_exit: } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); -static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, struct request *rq) +void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, + struct request *rq) { const int tag = rq->tag; struct request_queue *q = rq->q; @@ -802,7 +804,7 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data) * Process software queues that have been marked busy, splicing them * to the for-dispatch */ -static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) +void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) { struct flush_busy_ctx_data data = { .hctx = hctx, @@ -811,6 +813,7 @@ static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) sbitmap_for_each_set(&hctx->ctx_map, flush_busy_ctx, &data); } +EXPORT_SYMBOL_GPL(blk_mq_flush_busy_ctxs); static inline unsigned int queued_to_index(unsigned int queued) { @@ -921,7 +924,7 @@ static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx) /* * Touch any software queue that has pending entries. */ - flush_busy_ctxs(hctx, &rq_list); + blk_mq_flush_busy_ctxs(hctx, &rq_list); /* * If we have previous entries on our dispatch list, grab them @@ -1135,8 +1138,8 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, list_add_tail(&rq->queuelist, &ctx->rq_list); } -static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, - struct request *rq, bool at_head) +void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool at_head) { struct blk_mq_ctx *ctx = rq->mq_ctx; @@ -1550,8 +1553,8 @@ run_queue: return cookie; } -static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, - struct blk_mq_tags *tags, unsigned int hctx_idx) +void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx) { struct page *page; @@ -1588,8 +1591,8 @@ static size_t order_to_size(unsigned int order) return (size_t)PAGE_SIZE << order; } -static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, - unsigned int hctx_idx) +struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, + unsigned int hctx_idx) { struct blk_mq_tags *tags; unsigned int i, j, entries_per_page, max_order = 4; @@ -2279,10 +2282,10 @@ static int blk_mq_queue_reinit_dead(unsigned int cpu) * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list * and set bit0 in pending bitmap as ctx1->index_hw is still zero. * - * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in - * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. - * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list - * is ignored. + * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set + * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. + * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is + * ignored. */ static int blk_mq_queue_reinit_prepare(unsigned int cpu) { diff --git a/block/blk-mq.h b/block/blk-mq.h index 63e9116cddbd..e59f5ca520a2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -32,6 +32,21 @@ void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *); +void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); + +/* + * Internal helpers for allocating/freeing the request map + */ +void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx); +struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, + unsigned int hctx_idx); + +/* + * Internal helpers for request insertion into sw queues + */ +void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool at_head); /* * CPU hotplug helpers @@ -103,6 +118,16 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, data->hctx = hctx; } +/* + * Internal helpers for request allocation/init/free + */ +void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, + struct request *rq, unsigned int op); +void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, + struct request *rq); +struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, + unsigned int op); + static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) { return test_bit(BLK_MQ_S_STOPPED, &hctx->state); -- cgit v1.2.3 From 4941115bef2bc891aa00a2f0edeaf06dc982325a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jan 2017 08:09:05 -0700 Subject: blk-mq-tag: cleanup the normal/reserved tag allocation This is in preparation for having another tag set available. Cleanup the parameters, and allow passing in of tags for blk_mq_put_tag(). Signed-off-by: Jens Axboe [hch: even more cleanups] Signed-off-by: Christoph Hellwig Reviewed-by: Omar Sandoval --- block/blk-mq-tag.c | 94 +++++++++++++++++++++--------------------------------- block/blk-mq-tag.h | 4 +-- block/blk-mq.c | 2 +- block/blk-mq.h | 5 +++ 4 files changed, 44 insertions(+), 61 deletions(-) (limited to 'block') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index dcf5ce3ba4bf..ced752716878 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -90,32 +90,46 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, return atomic_read(&hctx->nr_active) < depth; } -static int __bt_get(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt) +static int __blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt) { if (!hctx_may_queue(hctx, bt)) return -1; return __sbitmap_queue_get(bt); } -static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt, - struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags) +unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) { + struct blk_mq_tags *tags = blk_mq_tags_from_data(data); + struct sbitmap_queue *bt; struct sbq_wait_state *ws; DEFINE_WAIT(wait); + unsigned int tag_offset; int tag; - tag = __bt_get(hctx, bt); + if (data->flags & BLK_MQ_REQ_RESERVED) { + if (unlikely(!tags->nr_reserved_tags)) { + WARN_ON_ONCE(1); + return BLK_MQ_TAG_FAIL; + } + bt = &tags->breserved_tags; + tag_offset = 0; + } else { + bt = &tags->bitmap_tags; + tag_offset = tags->nr_reserved_tags; + } + + tag = __blk_mq_get_tag(data->hctx, bt); if (tag != -1) - return tag; + goto found_tag; if (data->flags & BLK_MQ_REQ_NOWAIT) - return -1; + return BLK_MQ_TAG_FAIL; - ws = bt_wait_ptr(bt, hctx); + ws = bt_wait_ptr(bt, data->hctx); do { prepare_to_wait(&ws->wait, &wait, TASK_UNINTERRUPTIBLE); - tag = __bt_get(hctx, bt); + tag = __blk_mq_get_tag(data->hctx, bt); if (tag != -1) break; @@ -125,14 +139,14 @@ static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt, * some to complete. Note that hctx can be NULL here for * reserved tag allocation. */ - if (hctx) - blk_mq_run_hw_queue(hctx, false); + if (data->hctx) + blk_mq_run_hw_queue(data->hctx, false); /* * Retry tag allocation after running the hardware queue, * as running the queue may also have found completions. */ - tag = __bt_get(hctx, bt); + tag = __blk_mq_get_tag(data->hctx, bt); if (tag != -1) break; @@ -142,61 +156,25 @@ static int bt_get(struct blk_mq_alloc_data *data, struct sbitmap_queue *bt, data->ctx = blk_mq_get_ctx(data->q); data->hctx = blk_mq_map_queue(data->q, data->ctx->cpu); - if (data->flags & BLK_MQ_REQ_RESERVED) { - bt = &data->hctx->tags->breserved_tags; - } else { - hctx = data->hctx; - bt = &hctx->tags->bitmap_tags; - } + tags = blk_mq_tags_from_data(data); + if (data->flags & BLK_MQ_REQ_RESERVED) + bt = &tags->breserved_tags; + else + bt = &tags->bitmap_tags; + finish_wait(&ws->wait, &wait); - ws = bt_wait_ptr(bt, hctx); + ws = bt_wait_ptr(bt, data->hctx); } while (1); finish_wait(&ws->wait, &wait); - return tag; -} - -static unsigned int __blk_mq_get_tag(struct blk_mq_alloc_data *data) -{ - int tag; - - tag = bt_get(data, &data->hctx->tags->bitmap_tags, data->hctx, - data->hctx->tags); - if (tag >= 0) - return tag + data->hctx->tags->nr_reserved_tags; - - return BLK_MQ_TAG_FAIL; -} - -static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data) -{ - int tag; - if (unlikely(!data->hctx->tags->nr_reserved_tags)) { - WARN_ON_ONCE(1); - return BLK_MQ_TAG_FAIL; - } - - tag = bt_get(data, &data->hctx->tags->breserved_tags, NULL, - data->hctx->tags); - if (tag < 0) - return BLK_MQ_TAG_FAIL; - - return tag; +found_tag: + return tag + tag_offset; } -unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) +void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, + struct blk_mq_ctx *ctx, unsigned int tag) { - if (data->flags & BLK_MQ_REQ_RESERVED) - return __blk_mq_get_reserved_tag(data); - return __blk_mq_get_tag(data); -} - -void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, - unsigned int tag) -{ - struct blk_mq_tags *tags = hctx->tags; - if (tag >= tags->nr_reserved_tags) { const int real_tag = tag - tags->nr_reserved_tags; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index d1662734dc53..923602dd3bfb 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -24,8 +24,8 @@ extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int r extern void blk_mq_free_tags(struct blk_mq_tags *tags); extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); -extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, - unsigned int tag); +extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags, + struct blk_mq_ctx *ctx, unsigned int tag); extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); diff --git a/block/blk-mq.c b/block/blk-mq.c index 9fc521755e22..6fab8e9c724f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -335,7 +335,7 @@ void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); clear_bit(REQ_ATOM_POLL_SLEPT, &rq->atomic_flags); - blk_mq_put_tag(hctx, ctx, tag); + blk_mq_put_tag(hctx, hctx->tags, ctx, tag); blk_queue_exit(q); } diff --git a/block/blk-mq.h b/block/blk-mq.h index e59f5ca520a2..48b7771eb192 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -118,6 +118,11 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, data->hctx = hctx; } +static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) +{ + return data->hctx->tags; +} + /* * Internal helpers for request allocation/init/free */ -- cgit v1.2.3 From cc71a6f43886a8af57dbbce2a45b4b2aaf570fe6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Jan 2017 14:29:56 -0700 Subject: blk-mq: abstract out helpers for allocating/freeing tag maps Prep patch for adding an extra tag map for scheduler requests. Signed-off-by: Jens Axboe Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval --- block/blk-mq.c | 117 ++++++++++++++++++++++++++++++++++++--------------------- block/blk-mq.h | 14 ++++--- 2 files changed, 83 insertions(+), 48 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index 6fab8e9c724f..fcdeadc55753 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1553,8 +1553,8 @@ run_queue: return cookie; } -void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, - unsigned int hctx_idx) +void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx) { struct page *page; @@ -1580,33 +1580,30 @@ void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, kmemleak_free(page_address(page)); __free_pages(page, page->private); } +} +void blk_mq_free_rq_map(struct blk_mq_tags *tags) +{ kfree(tags->rqs); + tags->rqs = NULL; blk_mq_free_tags(tags); } -static size_t order_to_size(unsigned int order) -{ - return (size_t)PAGE_SIZE << order; -} - -struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, - unsigned int hctx_idx) +struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, + unsigned int hctx_idx, + unsigned int nr_tags, + unsigned int reserved_tags) { struct blk_mq_tags *tags; - unsigned int i, j, entries_per_page, max_order = 4; - size_t rq_size, left; - tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, + tags = blk_mq_init_tags(nr_tags, reserved_tags, set->numa_node, BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); if (!tags) return NULL; - INIT_LIST_HEAD(&tags->page_list); - - tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), + tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, set->numa_node); if (!tags->rqs) { @@ -1614,15 +1611,31 @@ struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, return NULL; } + return tags; +} + +static size_t order_to_size(unsigned int order) +{ + return (size_t)PAGE_SIZE << order; +} + +int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx, unsigned int depth) +{ + unsigned int i, j, entries_per_page, max_order = 4; + size_t rq_size, left; + + INIT_LIST_HEAD(&tags->page_list); + /* * rq_size is the size of the request plus driver payload, rounded * to the cacheline size */ rq_size = round_up(sizeof(struct request) + set->cmd_size, cache_line_size()); - left = rq_size * set->queue_depth; + left = rq_size * depth; - for (i = 0; i < set->queue_depth; ) { + for (i = 0; i < depth; ) { int this_order = max_order; struct page *page; int to_do; @@ -1656,7 +1669,7 @@ struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, */ kmemleak_alloc(p, order_to_size(this_order), 1, GFP_NOIO); entries_per_page = order_to_size(this_order) / rq_size; - to_do = min(entries_per_page, set->queue_depth - i); + to_do = min(entries_per_page, depth - i); left -= to_do * rq_size; for (j = 0; j < to_do; j++) { tags->rqs[i] = p; @@ -1673,11 +1686,11 @@ struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, i++; } } - return tags; + return 0; fail: - blk_mq_free_rq_map(set, tags, hctx_idx); - return NULL; + blk_mq_free_rqs(set, tags, hctx_idx); + return -ENOMEM; } /* @@ -1869,6 +1882,33 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, } } +static bool __blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, int hctx_idx) +{ + int ret = 0; + + set->tags[hctx_idx] = blk_mq_alloc_rq_map(set, hctx_idx, + set->queue_depth, set->reserved_tags); + if (!set->tags[hctx_idx]) + return false; + + ret = blk_mq_alloc_rqs(set, set->tags[hctx_idx], hctx_idx, + set->queue_depth); + if (!ret) + return true; + + blk_mq_free_rq_map(set->tags[hctx_idx]); + set->tags[hctx_idx] = NULL; + return false; +} + +static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, + unsigned int hctx_idx) +{ + blk_mq_free_rqs(set, set->tags[hctx_idx], hctx_idx); + blk_mq_free_rq_map(set->tags[hctx_idx]); + set->tags[hctx_idx] = NULL; +} + static void blk_mq_map_swqueue(struct request_queue *q, const struct cpumask *online_mask) { @@ -1897,17 +1937,15 @@ static void blk_mq_map_swqueue(struct request_queue *q, hctx_idx = q->mq_map[i]; /* unmapped hw queue can be remapped after CPU topo changed */ - if (!set->tags[hctx_idx]) { - set->tags[hctx_idx] = blk_mq_init_rq_map(set, hctx_idx); - + if (!set->tags[hctx_idx] && + !__blk_mq_alloc_rq_map(set, hctx_idx)) { /* * If tags initialization fail for some hctx, * that hctx won't be brought online. In this * case, remap the current ctx to hctx[0] which * is guaranteed to always have tags allocated */ - if (!set->tags[hctx_idx]) - q->mq_map[i] = 0; + q->mq_map[i] = 0; } ctx = per_cpu_ptr(q->queue_ctx, i); @@ -1930,10 +1968,9 @@ static void blk_mq_map_swqueue(struct request_queue *q, * fallback in case of a new remap fails * allocation */ - if (i && set->tags[i]) { - blk_mq_free_rq_map(set, set->tags[i], i); - set->tags[i] = NULL; - } + if (i && set->tags[i]) + blk_mq_free_map_and_requests(set, i); + hctx->tags = NULL; continue; } @@ -2100,10 +2137,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx = hctxs[j]; if (hctx) { - if (hctx->tags) { - blk_mq_free_rq_map(set, hctx->tags, j); - set->tags[j] = NULL; - } + if (hctx->tags) + blk_mq_free_map_and_requests(set, j); blk_mq_exit_hctx(q, set, hctx, j); free_cpumask_var(hctx->cpumask); kobject_put(&hctx->kobj); @@ -2299,17 +2334,15 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) { int i; - for (i = 0; i < set->nr_hw_queues; i++) { - set->tags[i] = blk_mq_init_rq_map(set, i); - if (!set->tags[i]) + for (i = 0; i < set->nr_hw_queues; i++) + if (!__blk_mq_alloc_rq_map(set, i)) goto out_unwind; - } return 0; out_unwind: while (--i >= 0) - blk_mq_free_rq_map(set, set->tags[i], i); + blk_mq_free_rq_map(set->tags[i]); return -ENOMEM; } @@ -2433,10 +2466,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) { int i; - for (i = 0; i < nr_cpu_ids; i++) { - if (set->tags[i]) - blk_mq_free_rq_map(set, set->tags[i], i); - } + for (i = 0; i < nr_cpu_ids; i++) + blk_mq_free_map_and_requests(set, i); kfree(set->mq_map); set->mq_map = NULL; diff --git a/block/blk-mq.h b/block/blk-mq.h index 48b7771eb192..1b279b02d0f6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -37,17 +37,21 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); /* * Internal helpers for allocating/freeing the request map */ -void blk_mq_free_rq_map(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, - unsigned int hctx_idx); -struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, - unsigned int hctx_idx); +void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx); +void blk_mq_free_rq_map(struct blk_mq_tags *tags); +struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, + unsigned int hctx_idx, + unsigned int nr_tags, + unsigned int reserved_tags); +int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, + unsigned int hctx_idx, unsigned int depth); /* * Internal helpers for request insertion into sw queues */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); - /* * CPU hotplug helpers */ -- cgit v1.2.3 From fd2d332677c687ca90c12a47d6c377c547100b56 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 12 Jan 2017 10:04:45 -0700 Subject: blk-mq: add support for carrying internal tag information in blk_qc_t No functional change in this patch, just in preparation for having two types of tags available to the block layer for a single request. Signed-off-by: Jens Axboe Reviewed-by: Omar Sandoval --- block/blk-mq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-mq.c b/block/blk-mq.c index fcdeadc55753..d40be641f3d5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1308,6 +1308,11 @@ static struct request *blk_mq_map_request(struct request_queue *q, return rq; } +static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) +{ + return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false); +} + static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) { int ret; @@ -1318,7 +1323,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) .list = NULL, .last = 1 }; - blk_qc_t new_cookie = blk_tag_to_qc_t(rq->tag, hctx->queue_num); + blk_qc_t new_cookie = request_to_qc_t(hctx, rq); if (blk_mq_hctx_stopped(hctx)) goto insert; @@ -1387,7 +1392,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) wbt_track(&rq->issue_stat, wb_acct); - cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); + cookie = request_to_qc_t(data.hctx, rq); if (unlikely(is_flush_fua)) { blk_mq_bio_to_request(rq, bio); @@ -1496,7 +1501,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) wbt_track(&rq->issue_stat, wb_acct); - cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num); + cookie = request_to_qc_t(data.hctx, rq); if (unlikely(is_flush_fua)) { blk_mq_bio_to_request(rq, bio); -- cgit v1.2.3 From 2af8cbe30531eca73c8f3ba277f155fc0020b01a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jan 2017 14:39:30 -0700 Subject: blk-mq: split tag ->rqs[] into two This is in preparation for having two sets of tags available. For that we need a static index, and a dynamically assignable one. Signed-off-by: Jens Axboe Reviewed-by: Omar Sandoval --- block/blk-mq-tag.c | 4 ++-- block/blk-mq-tag.h | 1 + block/blk-mq.c | 30 +++++++++++++++++++++++------- 3 files changed, 26 insertions(+), 9 deletions(-) (limited to 'block') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index ced752716878..9753747a34a2 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -290,11 +290,11 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) struct blk_mq_tags *tags = set->tags[i]; for (j = 0; j < tags->nr_tags; j++) { - if (!tags->rqs[j]) + if (!tags->static_rqs[j]) continue; ret = set->ops->reinit_request(set->driver_data, - tags->rqs[j]); + tags->static_rqs[j]); if (ret) goto out; } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 923602dd3bfb..41cd15fd1afd 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -16,6 +16,7 @@ struct blk_mq_tags { struct sbitmap_queue breserved_tags; struct request **rqs; + struct request **static_rqs; struct list_head page_list; }; diff --git a/block/blk-mq.c b/block/blk-mq.c index d40be641f3d5..89b81254201b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -223,7 +223,7 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, tag = blk_mq_get_tag(data); if (tag != BLK_MQ_TAG_FAIL) { - rq = data->hctx->tags->rqs[tag]; + rq = data->hctx->tags->static_rqs[tag]; if (blk_mq_tag_busy(data->hctx)) { rq->rq_flags = RQF_MQ_INFLIGHT; @@ -231,6 +231,7 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, } rq->tag = tag; + data->hctx->tags->rqs[tag] = rq; blk_mq_rq_ctx_init(data->q, data->ctx, rq, op); return rq; } @@ -1567,11 +1568,13 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, int i; for (i = 0; i < tags->nr_tags; i++) { - if (!tags->rqs[i]) + struct request *rq = tags->static_rqs[i]; + + if (!rq) continue; - set->ops->exit_request(set->driver_data, tags->rqs[i], + set->ops->exit_request(set->driver_data, rq, hctx_idx, i); - tags->rqs[i] = NULL; + tags->static_rqs[i] = NULL; } } @@ -1591,6 +1594,8 @@ void blk_mq_free_rq_map(struct blk_mq_tags *tags) { kfree(tags->rqs); tags->rqs = NULL; + kfree(tags->static_rqs); + tags->static_rqs = NULL; blk_mq_free_tags(tags); } @@ -1616,6 +1621,15 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, return NULL; } + tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *), + GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, + set->numa_node); + if (!tags->static_rqs) { + kfree(tags->rqs); + blk_mq_free_tags(tags); + return NULL; + } + return tags; } @@ -1677,12 +1691,14 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, to_do = min(entries_per_page, depth - i); left -= to_do * rq_size; for (j = 0; j < to_do; j++) { - tags->rqs[i] = p; + struct request *rq = p; + + tags->static_rqs[i] = rq; if (set->ops->init_request) { if (set->ops->init_request(set->driver_data, - tags->rqs[i], hctx_idx, i, + rq, hctx_idx, i, set->numa_node)) { - tags->rqs[i] = NULL; + tags->static_rqs[i] = NULL; goto fail; } } -- cgit v1.2.3 From bd166ef183c263c5ced656d49ef19c7da4adc774 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Jan 2017 06:03:22 -0700 Subject: blk-mq-sched: add framework for MQ capable IO schedulers This adds a set of hooks that intercepts the blk-mq path of allocating/inserting/issuing/completing requests, allowing us to develop a scheduler within that framework. We reuse the existing elevator scheduler API on the registration side, but augment that with the scheduler flagging support for the blk-mq interfce, and with a separate set of ops hooks for MQ devices. We split driver and scheduler tags, so we can run the scheduling independently of device queue depth. Signed-off-by: Jens Axboe Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval --- block/Makefile | 2 +- block/blk-cgroup.c | 24 +++- block/blk-core.c | 4 +- block/blk-exec.c | 3 +- block/blk-flush.c | 12 +- block/blk-ioc.c | 8 +- block/blk-merge.c | 2 +- block/blk-mq-sched.c | 368 +++++++++++++++++++++++++++++++++++++++++++++++++++ block/blk-mq-sched.h | 170 ++++++++++++++++++++++++ block/blk-mq-sysfs.c | 13 ++ block/blk-mq.c | 318 ++++++++++++++++++++++++++------------------ block/blk-mq.h | 8 +- block/blk-tag.c | 1 + block/elevator.c | 204 +++++++++++++++++++++------- 14 files changed, 945 insertions(+), 192 deletions(-) create mode 100644 block/blk-mq-sched.c create mode 100644 block/blk-mq-sched.h (limited to 'block') diff --git a/block/Makefile b/block/Makefile index a827f988c4e6..2eee9e1bb6db 100644 --- a/block/Makefile +++ b/block/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ - blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ + blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ badblocks.o partitions/ diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8ba0af780e88..2630f64bed19 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1223,7 +1223,11 @@ int blkcg_activate_policy(struct request_queue *q, if (blkcg_policy_enabled(q, pol)) return 0; - blk_queue_bypass_start(q); + if (q->mq_ops) { + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + } else + blk_queue_bypass_start(q); pd_prealloc: if (!pd_prealloc) { pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); @@ -1261,7 +1265,10 @@ pd_prealloc: spin_unlock_irq(q->queue_lock); out_bypass_end: - blk_queue_bypass_end(q); + if (q->mq_ops) + blk_mq_unfreeze_queue(q); + else + blk_queue_bypass_end(q); if (pd_prealloc) pol->pd_free_fn(pd_prealloc); return ret; @@ -1284,7 +1291,12 @@ void blkcg_deactivate_policy(struct request_queue *q, if (!blkcg_policy_enabled(q, pol)) return; - blk_queue_bypass_start(q); + if (q->mq_ops) { + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + } else + blk_queue_bypass_start(q); + spin_lock_irq(q->queue_lock); __clear_bit(pol->plid, q->blkcg_pols); @@ -1304,7 +1316,11 @@ void blkcg_deactivate_policy(struct request_queue *q, } spin_unlock_irq(q->queue_lock); - blk_queue_bypass_end(q); + + if (q->mq_ops) + blk_mq_unfreeze_queue(q); + else + blk_queue_bypass_end(q); } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); diff --git a/block/blk-core.c b/block/blk-core.c index 92baea07acbc..a61f1407f4f6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,6 +39,7 @@ #include "blk.h" #include "blk-mq.h" +#include "blk-mq-sched.h" #include "blk-wbt.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); @@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->cmd = rq->__cmd; rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; + rq->internal_tag = -1; rq->start_time = jiffies; set_start_time_ns(rq); rq->part = NULL; @@ -2127,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) if (q->mq_ops) { if (blk_queue_io_stat(q)) blk_account_io_start(rq, true); - blk_mq_insert_request(rq, false, true, false); + blk_mq_sched_insert_request(rq, false, true, false); return 0; } diff --git a/block/blk-exec.c b/block/blk-exec.c index 3ecb00a6cf45..86656fdfa637 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -9,6 +9,7 @@ #include #include "blk.h" +#include "blk-mq-sched.h" /* * for max sense size @@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * be reused after dying flag is set */ if (q->mq_ops) { - blk_mq_insert_request(rq, at_head, true, false); + blk_mq_sched_insert_request(rq, at_head, true, false); return; } diff --git a/block/blk-flush.c b/block/blk-flush.c index 20b7c7a02f1c..d7de34ee39c2 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -74,6 +74,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-tag.h" +#include "blk-mq-sched.h" /* FLUSH/FUA sequences */ enum { @@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error) * the comment in flush_end_io(). */ spin_lock_irqsave(&fq->mq_flush_lock, flags); - if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) - blk_mq_run_hw_queue(hctx, true); + blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error); spin_unlock_irqrestore(&fq->mq_flush_lock, flags); + + blk_mq_run_hw_queue(hctx, true); } /** @@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - if (q->mq_ops) { - blk_mq_insert_request(rq, false, true, false); - } else + if (q->mq_ops) + blk_mq_sched_insert_request(rq, false, true, false); + else list_add_tail(&rq->queuelist, &q->queue_head); return; } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index ab372092a57d..fe186a9eade9 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -43,7 +43,9 @@ static void ioc_exit_icq(struct io_cq *icq) if (icq->flags & ICQ_EXITED) return; - if (et->ops.sq.elevator_exit_icq_fn) + if (et->uses_mq && et->ops.mq.exit_icq) + et->ops.mq.exit_icq(icq); + else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn) et->ops.sq.elevator_exit_icq_fn(icq); icq->flags |= ICQ_EXITED; @@ -383,7 +385,9 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { hlist_add_head(&icq->ioc_node, &ioc->icq_list); list_add(&icq->q_node, &q->icq_list); - if (et->ops.sq.elevator_init_icq_fn) + if (et->uses_mq && et->ops.mq.init_icq) + et->ops.mq.init_icq(icq); + else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn) et->ops.sq.elevator_init_icq_fn(icq); } else { kmem_cache_free(et->icq_cache, icq); diff --git a/block/blk-merge.c b/block/blk-merge.c index 480570b691dc..6aa43dec5af4 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -763,7 +763,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, { struct elevator_queue *e = q->elevator; - if (e->type->ops.sq.elevator_allow_rq_merge_fn) + if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn) if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) return 0; diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c new file mode 100644 index 000000000000..26759798a0b3 --- /dev/null +++ b/block/blk-mq-sched.c @@ -0,0 +1,368 @@ +/* + * blk-mq scheduling framework + * + * Copyright (C) 2016 Jens Axboe + */ +#include +#include +#include + +#include + +#include "blk.h" +#include "blk-mq.h" +#include "blk-mq-sched.h" +#include "blk-mq-tag.h" +#include "blk-wbt.h" + +void blk_mq_sched_free_hctx_data(struct request_queue *q, + void (*exit)(struct blk_mq_hw_ctx *)) +{ + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + if (exit && hctx->sched_data) + exit(hctx); + kfree(hctx->sched_data); + hctx->sched_data = NULL; + } +} +EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data); + +int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size, + int (*init)(struct blk_mq_hw_ctx *), + void (*exit)(struct blk_mq_hw_ctx *)) +{ + struct blk_mq_hw_ctx *hctx; + int ret; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node); + if (!hctx->sched_data) { + ret = -ENOMEM; + goto error; + } + + if (init) { + ret = init(hctx); + if (ret) { + /* + * We don't want to give exit() a partially + * initialized sched_data. init() must clean up + * if it fails. + */ + kfree(hctx->sched_data); + hctx->sched_data = NULL; + goto error; + } + } + } + + return 0; +error: + blk_mq_sched_free_hctx_data(q, exit); + return ret; +} +EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data); + +static void __blk_mq_sched_assign_ioc(struct request_queue *q, + struct request *rq, struct io_context *ioc) +{ + struct io_cq *icq; + + spin_lock_irq(q->queue_lock); + icq = ioc_lookup_icq(ioc, q); + spin_unlock_irq(q->queue_lock); + + if (!icq) { + icq = ioc_create_icq(ioc, q, GFP_ATOMIC); + if (!icq) + return; + } + + rq->elv.icq = icq; + if (!blk_mq_sched_get_rq_priv(q, rq)) { + rq->rq_flags |= RQF_ELVPRIV; + get_io_context(icq->ioc); + return; + } + + rq->elv.icq = NULL; +} + +static void blk_mq_sched_assign_ioc(struct request_queue *q, + struct request *rq, struct bio *bio) +{ + struct io_context *ioc; + + ioc = rq_ioc(bio); + if (ioc) + __blk_mq_sched_assign_ioc(q, rq, ioc); +} + +struct request *blk_mq_sched_get_request(struct request_queue *q, + struct bio *bio, + unsigned int op, + struct blk_mq_alloc_data *data) +{ + struct elevator_queue *e = q->elevator; + struct blk_mq_hw_ctx *hctx; + struct blk_mq_ctx *ctx; + struct request *rq; + const bool is_flush = op & (REQ_PREFLUSH | REQ_FUA); + + blk_queue_enter_live(q); + ctx = blk_mq_get_ctx(q); + hctx = blk_mq_map_queue(q, ctx->cpu); + + blk_mq_set_alloc_data(data, q, 0, ctx, hctx); + + if (e) { + data->flags |= BLK_MQ_REQ_INTERNAL; + + /* + * Flush requests are special and go directly to the + * dispatch list. + */ + if (!is_flush && e->type->ops.mq.get_request) { + rq = e->type->ops.mq.get_request(q, op, data); + if (rq) + rq->rq_flags |= RQF_QUEUED; + } else + rq = __blk_mq_alloc_request(data, op); + } else { + rq = __blk_mq_alloc_request(data, op); + data->hctx->tags->rqs[rq->tag] = rq; + } + + if (rq) { + if (!is_flush) { + rq->elv.icq = NULL; + if (e && e->type->icq_cache) + blk_mq_sched_assign_ioc(q, rq, bio); + } + data->hctx->queued++; + return rq; + } + + blk_queue_exit(q); + return NULL; +} + +void blk_mq_sched_put_request(struct request *rq) +{ + struct request_queue *q = rq->q; + struct elevator_queue *e = q->elevator; + + if (rq->rq_flags & RQF_ELVPRIV) { + blk_mq_sched_put_rq_priv(rq->q, rq); + if (rq->elv.icq) { + put_io_context(rq->elv.icq->ioc); + rq->elv.icq = NULL; + } + } + + if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request) + e->type->ops.mq.put_request(rq); + else + blk_mq_finish_request(rq); +} + +void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx) +{ + struct elevator_queue *e = hctx->queue->elevator; + LIST_HEAD(rq_list); + + if (unlikely(blk_mq_hctx_stopped(hctx))) + return; + + hctx->run++; + + /* + * If we have previous entries on our dispatch list, grab them first for + * more fair dispatch. + */ + if (!list_empty_careful(&hctx->dispatch)) { + spin_lock(&hctx->lock); + if (!list_empty(&hctx->dispatch)) + list_splice_init(&hctx->dispatch, &rq_list); + spin_unlock(&hctx->lock); + } + + /* + * Only ask the scheduler for requests, if we didn't have residual + * requests from the dispatch list. This is to avoid the case where + * we only ever dispatch a fraction of the requests available because + * of low device queue depth. Once we pull requests out of the IO + * scheduler, we can no longer merge or sort them. So it's best to + * leave them there for as long as we can. Mark the hw queue as + * needing a restart in that case. + */ + if (list_empty(&rq_list)) { + if (e && e->type->ops.mq.dispatch_requests) + e->type->ops.mq.dispatch_requests(hctx, &rq_list); + else + blk_mq_flush_busy_ctxs(hctx, &rq_list); + } else + blk_mq_sched_mark_restart(hctx); + + blk_mq_dispatch_rq_list(hctx, &rq_list); +} + +void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, + struct list_head *rq_list, + struct request *(*get_rq)(struct blk_mq_hw_ctx *)) +{ + do { + struct request *rq; + + rq = get_rq(hctx); + if (!rq) + break; + + list_add_tail(&rq->queuelist, rq_list); + } while (1); +} +EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch); + +bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio) +{ + struct request *rq; + int ret; + + ret = elv_merge(q, &rq, bio); + if (ret == ELEVATOR_BACK_MERGE) { + if (!blk_mq_sched_allow_merge(q, rq, bio)) + return false; + if (bio_attempt_back_merge(q, rq, bio)) { + if (!attempt_back_merge(q, rq)) + elv_merged_request(q, rq, ret); + return true; + } + } else if (ret == ELEVATOR_FRONT_MERGE) { + if (!blk_mq_sched_allow_merge(q, rq, bio)) + return false; + if (bio_attempt_front_merge(q, rq, bio)) { + if (!attempt_front_merge(q, rq)) + elv_merged_request(q, rq, ret); + return true; + } + } + + return false; +} +EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); + +bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) +{ + struct elevator_queue *e = q->elevator; + + if (e->type->ops.mq.bio_merge) { + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + + blk_mq_put_ctx(ctx); + return e->type->ops.mq.bio_merge(hctx, bio); + } + + return false; +} + +bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) +{ + return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq); +} +EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); + +void blk_mq_sched_request_inserted(struct request *rq) +{ + trace_block_rq_insert(rq->q, rq); +} +EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); + +bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq) +{ + if (rq->tag == -1) { + rq->rq_flags |= RQF_SORTED; + return false; + } + + /* + * If we already have a real request tag, send directly to + * the dispatch list. + */ + spin_lock(&hctx->lock); + list_add(&rq->queuelist, &hctx->dispatch); + spin_unlock(&hctx->lock); + return true; +} +EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert); + +static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, + struct blk_mq_hw_ctx *hctx, + unsigned int hctx_idx) +{ + if (hctx->sched_tags) { + blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); + blk_mq_free_rq_map(hctx->sched_tags); + hctx->sched_tags = NULL; + } +} + +int blk_mq_sched_setup(struct request_queue *q) +{ + struct blk_mq_tag_set *set = q->tag_set; + struct blk_mq_hw_ctx *hctx; + int ret, i; + + /* + * Default to 256, since we don't split into sync/async like the + * old code did. Additionally, this is a per-hw queue depth. + */ + q->nr_requests = 2 * BLKDEV_MAX_RQ; + + /* + * We're switching to using an IO scheduler, so setup the hctx + * scheduler tags and switch the request map from the regular + * tags to scheduler tags. First allocate what we need, so we + * can safely fail and fallback, if needed. + */ + ret = 0; + queue_for_each_hw_ctx(q, hctx, i) { + hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0); + if (!hctx->sched_tags) { + ret = -ENOMEM; + break; + } + ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests); + if (ret) + break; + } + + /* + * If we failed, free what we did allocate + */ + if (ret) { + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx->sched_tags) + continue; + blk_mq_sched_free_tags(set, hctx, i); + } + + return ret; + } + + return 0; +} + +void blk_mq_sched_teardown(struct request_queue *q) +{ + struct blk_mq_tag_set *set = q->tag_set; + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_sched_free_tags(set, hctx, i); +} diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h new file mode 100644 index 000000000000..35c49e2e008a --- /dev/null +++ b/block/blk-mq-sched.h @@ -0,0 +1,170 @@ +#ifndef BLK_MQ_SCHED_H +#define BLK_MQ_SCHED_H + +#include "blk-mq.h" +#include "blk-mq-tag.h" + +int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size, + int (*init)(struct blk_mq_hw_ctx *), + void (*exit)(struct blk_mq_hw_ctx *)); + +void blk_mq_sched_free_hctx_data(struct request_queue *q, + void (*exit)(struct blk_mq_hw_ctx *)); + +struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data); +void blk_mq_sched_put_request(struct request *rq); + +void blk_mq_sched_request_inserted(struct request *rq); +bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq); +bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio); +bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); +bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); + +void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); +void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx, + struct list_head *rq_list, + struct request *(*get_rq)(struct blk_mq_hw_ctx *)); + +int blk_mq_sched_setup(struct request_queue *q); +void blk_mq_sched_teardown(struct request_queue *q); + +static inline bool +blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) +{ + struct elevator_queue *e = q->elevator; + + if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio)) + return false; + + return __blk_mq_sched_bio_merge(q, bio); +} + +static inline int blk_mq_sched_get_rq_priv(struct request_queue *q, + struct request *rq) +{ + struct elevator_queue *e = q->elevator; + + if (e && e->type->ops.mq.get_rq_priv) + return e->type->ops.mq.get_rq_priv(q, rq); + + return 0; +} + +static inline void blk_mq_sched_put_rq_priv(struct request_queue *q, + struct request *rq) +{ + struct elevator_queue *e = q->elevator; + + if (e && e->type->ops.mq.put_rq_priv) + e->type->ops.mq.put_rq_priv(q, rq); +} + +static inline void +blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue, + bool async) +{ + struct request_queue *q = rq->q; + struct elevator_queue *e = q->elevator; + struct blk_mq_ctx *ctx = rq->mq_ctx; + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + + if (e && e->type->ops.mq.insert_requests) { + LIST_HEAD(list); + + list_add(&rq->queuelist, &list); + e->type->ops.mq.insert_requests(hctx, &list, at_head); + } else { + spin_lock(&ctx->lock); + __blk_mq_insert_request(hctx, rq, at_head); + spin_unlock(&ctx->lock); + } + + if (run_queue) + blk_mq_run_hw_queue(hctx, async); +} + +static inline void +blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_ctx *ctx, + struct list_head *list, bool run_queue_async) +{ + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); + struct elevator_queue *e = hctx->queue->elevator; + + if (e && e->type->ops.mq.insert_requests) + e->type->ops.mq.insert_requests(hctx, list, false); + else + blk_mq_insert_requests(hctx, ctx, list); + + blk_mq_run_hw_queue(hctx, run_queue_async); +} + +static inline bool +blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq, + struct bio *bio) +{ + struct elevator_queue *e = q->elevator; + + if (e && e->type->ops.mq.allow_merge) + return e->type->ops.mq.allow_merge(q, rq, bio); + + return true; +} + +static inline void +blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq) +{ + struct elevator_queue *e = hctx->queue->elevator; + + if (e && e->type->ops.mq.completed_request) + e->type->ops.