diff options
Diffstat (limited to 'block')
53 files changed, 1532 insertions, 1193 deletions
diff --git a/block/Makefile b/block/Makefile index 1a43750f4b01..206b96e9387f 100644 --- a/block/Makefile +++ b/block/Makefile @@ -8,8 +8,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ - genhd.o partition-generic.o ioprio.o \ - badblocks.o partitions/ blk-rq-qos.o + genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index f0ff6654af28..68882b9b8f11 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -642,6 +642,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; + /* + * Get extra reference to prevent bfqq from being freed in + * next possible expire or deactivate. + */ + bfqq->ref++; + /* If bfqq is empty, then bfq_bfqq_expire also invokes * bfq_del_bfqq_busy, thereby removing bfqq and its entity * from data structures related to current group. Otherwise we @@ -652,12 +658,6 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); - /* - * get extra reference to prevent bfqq from being freed in - * next possible deactivate - */ - bfqq->ref++; - if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st_or_in_serv) @@ -677,7 +677,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->in_service_queue && !bfqd->rq_in_driver) bfq_schedule_dispatch(bfqd); - /* release extra ref taken above */ + /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } @@ -714,10 +714,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, if (entity->sched_data != &bfqg->sched_data) { bic_set_bfqq(bic, NULL, 0); - bfq_log_bfqq(bfqd, async_bfqq, - "bic_change_group: %p %d", - async_bfqq, async_bfqq->ref); - bfq_put_queue(async_bfqq); + bfq_release_process_ref(bfqd, async_bfqq); } } @@ -818,39 +815,53 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st) /** * bfq_reparent_leaf_entity - move leaf entity to the root_group. * @bfqd: the device data structure with the root group. - * @entity: the entity to move. + * @entity: the entity to move, if entity is a leaf; or the parent entity + * of an active leaf entity to move, if entity is not a leaf. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, - struct bfq_entity *entity) + struct bfq_entity *entity, + int ioprio_class) { - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct bfq_queue *bfqq; + struct bfq_entity *child_entity = entity; + while (child_entity->my_sched_data) { /* leaf not reached yet */ + struct bfq_sched_data *child_sd = child_entity->my_sched_data; + struct bfq_service_tree *child_st = child_sd->service_tree + + ioprio_class; + struct rb_root *child_active = &child_st->active; + + child_entity = bfq_entity_of(rb_first(child_active)); + + if (!child_entity) + child_entity = child_sd->in_service_entity; + } + + bfqq = bfq_entity_to_bfqq(child_entity); bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); } /** - * bfq_reparent_active_entities - move to the root group all active - * entities. + * bfq_reparent_active_queues - move to the root group all active queues. * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. - * @st: the service tree with the entities. + * @st: the service tree to start the search from. */ -static void bfq_reparent_active_entities(struct bfq_data *bfqd, - struct bfq_group *bfqg, - struct bfq_service_tree *st) +static void bfq_reparent_active_queues(struct bfq_data *bfqd, + struct bfq_group *bfqg, + struct bfq_service_tree *st, + int ioprio_class) { struct rb_root *active = &st->active; - struct bfq_entity *entity = NULL; - - if (!RB_EMPTY_ROOT(&st->active)) - entity = bfq_entity_of(rb_first(active)); + struct bfq_entity *entity; - for (; entity ; entity = bfq_entity_of(rb_first(active))) - bfq_reparent_leaf_entity(bfqd, entity); + while ((entity = bfq_entity_of(rb_first(active)))) + bfq_reparent_leaf_entity(bfqd, entity, ioprio_class); if (bfqg->sched_data.in_service_entity) bfq_reparent_leaf_entity(bfqd, - bfqg->sched_data.in_service_entity); + bfqg->sched_data.in_service_entity, + ioprio_class); } /** @@ -883,13 +894,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) st = bfqg->sched_data.service_tree + i; /* - * The idle tree may still contain bfq_queues belonging - * to exited task because they never migrated to a different - * cgroup from the one being destroyed now. - */ - bfq_flush_idle_tree(st); - - /* * It may happen that some queues are still active * (busy) upon group destruction (if the corresponding * processes have been forced to terminate). We move @@ -901,7 +905,20 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) * There is no need to put the sync queues, as the * scheduler has taken no reference. */ - bfq_reparent_active_entities(bfqd, bfqg, st); + bfq_reparent_active_queues(bfqd, bfqg, st, i); + + /* + * The idle tree may still contain bfq_queues + * belonging to exited task because they never + * migrated to a different cgroup from the one being + * destroyed now. In addition, even + * bfq_reparent_active_queues() may happen to add some + * entities to the idle tree. It happens if, in some + * of the calls to bfq_bfqq_move() performed by + * bfq_reparent_active_queues(), the queue to move is + * empty and gets expired. + */ + bfq_flush_idle_tree(st); } __bfq_deactivate_entity(entity, false); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 8c436abfaf14..78ba57efd16b 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2716,8 +2716,6 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) } } - -static void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) { /* @@ -6215,20 +6213,28 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) return bfqq; } -static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq) +static void +bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq) { - struct bfq_data *bfqd = bfqq->bfqd; enum bfqq_expiration reason; unsigned long flags; spin_lock_irqsave(&bfqd->lock, flags); - bfq_clear_bfqq_wait_request(bfqq); + /* + * Considering that bfqq may be in race, we should firstly check + * whether bfqq is in service before doing something on it. If + * the bfqq in race is not in service, it has already been expired + * through __bfq_bfqq_expire func and its wait_request flags has + * been cleared in __bfq_bfqd_reset_in_service func. + */ if (bfqq != bfqd->in_service_queue) { spin_unlock_irqrestore(&bfqd->lock, flags); return; } + bfq_clear_bfqq_wait_request(bfqq); + if (bfq_bfqq_budget_timeout(bfqq)) /* * Also here the queue can be safely expired @@ -6273,7 +6279,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) * early. */ if (bfqq) - bfq_idle_slice_timer_body(bfqq); + bfq_idle_slice_timer_body(bfqd, bfqq); return HRTIMER_NORESTART; } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index d1233af9c684..cd224aaf9f52 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -955,6 +955,7 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); +void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_schedule_dispatch(struct bfq_data *bfqd); void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); diff --git a/block/bio.c b/block/bio.c index 94d697217887..21cbaa6a1c20 100644 --- a/block/bio.c +++ b/block/bio.c @@ -17,6 +17,7 @@ #include <linux/cgroup.h> #include <linux/blk-cgroup.h> #include <linux/highmem.h> +#include <linux/sched/sysctl.h> #include <trace/events/block.h> #include "blk.h" @@ -588,6 +589,49 @@ void bio_truncate(struct bio *bio, unsigned new_size) } /** + * guard_bio_eod - truncate a BIO to fit the block device + * @bio: bio to truncate + * + * This allows us to do IO even on the odd last sectors of a device, even if the + * block size is some multiple of the physical sector size. + * + * We'll just truncate the bio to the size of the device, and clear the end of + * the buffer head manually. Truly out-of-range accesses will turn into actual + * I/O errors, this only handles the "we need to be able to do I/O at the final + * sector" case. + */ +void guard_bio_eod(struct bio *bio) +{ + sector_t maxsector; + struct hd_struct *part; + + rcu_read_lock(); + part = __disk_get_part(bio->bi_disk, bio->bi_partno); + if (part) + maxsector = part_nr_sects_read(part); + else + maxsector = get_capacity(bio->bi_disk); + rcu_read_unlock(); + + if (!maxsector) + return; + + /* + * If the *whole* IO is past the end of the device, + * let it through, and the IO layer will turn it into + * an EIO. + */ + if (unlikely(bio->bi_iter.bi_sector >= maxsector)) + return; + + maxsector -= bio->bi_iter.bi_sector; + if (likely((bio->bi_iter.bi_size >> 9) <= maxsector)) + return; + + bio_truncate(bio, maxsector << 9); +} + +/** * bio_put - release a reference to a bio * @bio: bio to release reference to * @@ -679,6 +723,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) } EXPORT_SYMBOL(bio_clone_fast); +const char *bio_devname(struct bio *bio, char *buf) +{ + return disk_name(bio->bi_disk, bio->bi_partno, buf); +} +EXPORT_SYMBOL(bio_devname); + static inline bool page_is_mergeable(const struct bio_vec *bv, struct page *page, unsigned int len, unsigned int off, bool *same_page) @@ -730,7 +780,7 @@ static bool bio_try_merge_pc_page(struct request_queue *q, struct bio *bio, * * This should only be used by passthrough bios. */ -static int __bio_add_pc_page(struct request_queue *q, struct bio *bio, +int __bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, bool *same_page) { @@ -1019,12 +1069,21 @@ static void submit_bio_wait_endio(struct bio *bio) int submit_bio_wait(struct bio *bio) { DECLARE_COMPLETION_ONSTACK_MAP(done, bio->bi_disk->lockdep_map); + unsigned long hang_check; bio->bi_private = &done; bio->bi_end_io = submit_bio_wait_endio; bio->bi_opf |= REQ_SYNC; submit_bio(bio); - wait_for_completion_io(&done); + + /* Prevent hang_check timer from firing at us during very long I/O */ + hang_check = sysctl_hung_task_timeout_secs; + if (hang_check) + while (!wait_for_completion_io_timeout(&done, + hang_check * (HZ/2))) + ; + else + wait_for_completion_io(&done); return blk_status_to_errno(bio->bi_status); } @@ -1135,90 +1194,6 @@ void bio_list_copy_data(struct bio *dst, struct bio *src) } EXPORT_SYMBOL(bio_list_copy_data); -struct bio_map_data { - int is_our_pages; - struct iov_iter iter; - struct iovec iov[]; -}; - -static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, - gfp_t gfp_mask) -{ - struct bio_map_data *bmd; - if (data->nr_segs > UIO_MAXIOV) - return NULL; - - bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); - if (!bmd) - return NULL; - memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); - bmd->iter = *data; - bmd->iter.iov = bmd->iov; - return bmd; -} - -/** - * bio_copy_from_iter - copy all pages from iov_iter to bio - * @bio: The &struct bio which describes the I/O as destination - * @iter: iov_iter as source - * - * Copy all pages from iov_iter to bio. - * Returns 0 on success, or error on failure. - */ -static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) -{ - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - ssize_t ret; - - ret = copy_page_from_iter(bvec->bv_page, - bvec->bv_offset, - bvec->bv_len, - iter); - - if (!iov_iter_count(iter)) - break; - - if (ret < bvec->bv_len) - return -EFAULT; - } - - return 0; -} - -/** - * bio_copy_to_iter - copy all pages from bio to iov_iter - * @bio: The &struct bio which describes the I/O as source - * @iter: iov_iter as destination - * - * Copy all pages from bio to iov_iter. - * Returns 0 on success, or error on failure. - */ -static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) -{ - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - ssize_t ret; - - ret = copy_page_to_iter(bvec->bv_page, - bvec->bv_offset, - bvec->bv_len, - &iter); - - if (!iov_iter_count(&iter)) - break; - - if (ret < bvec->bv_len) - return -EFAULT; - } - - return 0; -} - void bio_free_pages(struct bio *bio) { struct bio_vec *bvec; @@ -1229,430 +1204,6 @@ void bio_free_pages(struct bio *bio) } EXPORT_SYMBOL(bio_free_pages); -/** - * bio_uncopy_user - finish previously mapped bio - * @bio: bio being terminated - * - * Free pages allocated from bio_copy_user_iov() and write back data - * to user space in case of a read. - */ -int bio_uncopy_user(struct bio *bio) -{ - struct bio_map_data *bmd = bio->bi_private; - int ret = 0; - - if (!bio_flagged(bio, BIO_NULL_MAPPED)) { - /* - * if we're in a workqueue, the request is orphaned, so - * don't copy into a random user address space, just free - * and return -EINTR so user space doesn't expect any data. - */ - if (!current->mm) - ret = -EINTR; - else if (bio_data_dir(bio) == READ) - ret = bio_copy_to_iter(bio, bmd->iter); - if (bmd->is_our_pages) - bio_free_pages(bio); - } - kfree(bmd); - bio_put(bio); - return ret; -} - -/** - * bio_copy_user_iov - copy user data to bio - * @q: destination block queue - * @map_data: pointer to the rq_map_data holding pages (if necessary) - * @iter: iovec iterator - * @gfp_mask: memory allocation flags - * - * Prepares and returns a bio for indirect user io, bouncing data - * to/from kernel pages as necessary. Must be paired with - * call bio_uncopy_user() on io completion. - */ -struct bio *bio_copy_user_iov(struct request_queue *q, - struct rq_map_data *map_data, - struct iov_iter *iter, - gfp_t gfp_mask) -{ - struct bio_map_data *bmd; - struct page *page; - struct bio *bio; - int i = 0, ret; - int nr_pages; - unsigned int len = iter->count; - unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; - - bmd = bio_alloc_map_data(iter, gfp_mask); - if (!bmd) - return ERR_PTR(-ENOMEM); - - /* - * We need to do a deep copy of the iov_iter including the iovecs. - * The caller provided iov might point to an on-stack or otherwise - * shortlived one. - */ - bmd->is_our_pages = map_data ? 0 : 1; - - nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); - if (nr_pages > BIO_MAX_PAGES) - nr_pages = BIO_MAX_PAGES; - - ret = -ENOMEM; - bio = bio_kmalloc(gfp_mask, nr_pages); - if (!bio) - goto out_bmd; - - ret = 0; - - if (map_data) { - nr_pages = 1 << map_data->page_order; - i = map_data->offset / PAGE_SIZE; - } - while (len) { - unsigned int bytes = PAGE_SIZE; - - bytes -= offset; - - if (bytes > len) - bytes = len; - - if (map_data) { - if (i == map_data->nr_entries * nr_pages) { - ret = -ENOMEM; - break; - } - - page = map_data->pages[i / nr_pages]; - page += (i % nr_pages); - - i++; - } else { - page = alloc_page(q->bounce_gfp | gfp_mask); - if (!page) { - ret = -ENOMEM; - break; - } - } - - if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { - if (!map_data) - __free_page(page); - break; - } - - len -= bytes; - offset = 0; - } - - if (ret) - goto cleanup; - - if (map_data) - map_data->offset += bio->bi_iter.bi_size; - - /* - * success - */ - if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) || - (map_data && map_data->from_user)) { - ret = bio_copy_from_iter(bio, iter); - if (ret) - goto cleanup; - } else { - if (bmd->is_our_pages) - zero_fill_bio(bio); - iov_iter_advance(iter, bio->bi_iter.bi_size); - } - - bio->bi_private = bmd; - if (map_data && map_data->null_mapped) - bio_set_flag(bio, BIO_NULL_MAPPED); - return bio; -cleanup: - if (!map_data) - bio_free_pages(bio); - bio_put(bio); -out_bmd: - kfree(bmd); - return ERR_PTR(ret); -} - -/** - * bio_map_user_iov - map user iovec into bio - * @q: the struct request_queue for the bio - * @iter: iovec iterator - * @gfp_mask: memory allocation flags - * - * Map the user space address into a bio suitable for io to a block - * device. Returns an error pointer in case of error. - */ -struct bio *bio_map_user_iov(struct request_queue *q, - struct iov_iter *iter, - gfp_t gfp_mask) -{ - int j; - struct bio *bio; - int ret; - - if (!iov_iter_count(iter)) - return ERR_PTR(-EINVAL); - - bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); - if (!bio) - return ERR_PTR(-ENOMEM); - - while (iov_iter_count(iter)) { - struct page **pages; - ssize_t bytes; - size_t offs, added = 0; - int npages; - - bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); - if (unlikely(bytes <= 0)) { - ret = bytes ? bytes : -EFAULT; - goto out_unmap; - } - - npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - - if (unlikely(offs & queue_dma_alignment(q))) { - ret = -EINVAL; - j = 0; - } else { - for (j = 0; j < npages; j++) { - struct page *page = pages[j]; - unsigned int n = PAGE_SIZE - offs; - bool same_page = false; - - if (n > bytes) - n = bytes; - - if (!__bio_add_pc_page(q, bio, page, n, offs, - &same_page)) { - if (same_page) - put_page(page); - break; - } - - added += n; - bytes -= n; - offs = 0; - } - iov_iter_advance(iter, added); - } - /* - * release the pages we didn't map into the bio, if any - */ - while (j < npages) - put_page(pages[j++]); - kvfree(pages); - /* couldn't stuff something into bio? */ - if (bytes) - break; - } - - bio_set_flag(bio, BIO_USER_MAPPED); - - /* - * subtle -- if bio_map_user_iov() ended up bouncing a bio, - * it would normally disappear when its bi_end_io is run. - * however, we need it for the unmap, so grab an extra - * reference to it - */ - bio_get(bio); - return bio; - - out_unmap: - bio_release_pages(bio, false); - bio_put(bio); - return ERR_PTR(ret); -} - -/** - * bio_unmap_user - unmap a bio - * @bio: the bio being unmapped |