summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-05 14:27:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-05 14:27:02 -0700
commit3526dd0c7832f1011a0477cc6d903662bae05ea8 (patch)
tree22fbac64eb40a0b29bfa4c029695f39b2f591e62 /block
parentdd972f924df6bdbc0ab185a38d5d2361dbc26311 (diff)
parentbc6d65e6dc89c3b7ff78e4ad797117c122ffde8e (diff)
Merge tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: "It's a pretty quiet round this time, which is nice. This contains: - series from Bart, cleaning up the way we set/test/clear atomic queue flags. - series from Bart, fixing races between gendisk and queue registration and removal. - set of bcache fixes and improvements from various folks, by way of Michael Lyle. - set of lightnvm updates from Matias, most of it being the 1.2 to 2.0 transition. - removal of unused DIO flags from Nikolay. - blk-mq/sbitmap memory ordering fixes from Omar. - divide-by-zero fix for BFQ from Paolo. - minor documentation patches from Randy. - timeout fix from Tejun. - Alpha "can't write a char atomically" fix from Mikulas. - set of NVMe fixes by way of Keith. - bsg and bsg-lib improvements from Christoph. - a few sed-opal fixes from Jonas. - cdrom check-disk-change deadlock fix from Maurizio. - various little fixes, comment fixes, etc from various folks" * tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block: (139 commits) blk-mq: Directly schedule q->timeout_work when aborting a request blktrace: fix comment in blktrace_api.h lightnvm: remove function name in strings lightnvm: pblk: remove some unnecessary NULL checks lightnvm: pblk: don't recover unwritten lines lightnvm: pblk: implement 2.0 support lightnvm: pblk: implement get log report chunk lightnvm: pblk: rename ppaf* to addrf* lightnvm: pblk: check for supported version lightnvm: implement get log report chunk helpers lightnvm: make address conversions depend on generic device lightnvm: add support for 2.0 address format lightnvm: normalize geometry nomenclature lightnvm: complete geo structure with maxoc* lightnvm: add shorten OCSSD version in geo lightnvm: add minor version to generic geometry lightnvm: simplify geometry structure lightnvm: pblk: refactor init/exit sequences lightnvm: Avoid validation of default op value lightnvm: centralize permission check for lightnvm ioctl ...
Diffstat (limited to 'block')
-rw-r--r--block/bfq-iosched.c25
-rw-r--r--block/bfq-iosched.h2
-rw-r--r--block/bio.c4
-rw-r--r--block/blk-cgroup.c78
-rw-r--r--block/blk-core.c250
-rw-r--r--block/blk-mq-debugfs.c150
-rw-r--r--block/blk-mq-pci.c6
-rw-r--r--block/blk-mq.c20
-rw-r--r--block/blk-settings.c6
-rw-r--r--block/blk-stat.c6
-rw-r--r--block/blk-sysfs.c29
-rw-r--r--block/blk-timeout.c8
-rw-r--r--block/blk-zoned.c4
-rw-r--r--block/blk.h69
-rw-r--r--block/bsg-lib.c165
-rw-r--r--block/bsg.c262
-rw-r--r--block/sed-opal.c37
17 files changed, 695 insertions, 426 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index aeca22d91101..f0ecd98509d8 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -201,7 +201,20 @@ static struct kmem_cache *bfq_pool;
/* Target observation time interval for a peak-rate update (ns) */
#define BFQ_RATE_REF_INTERVAL NSEC_PER_SEC
-/* Shift used for peak rate fixed precision calculations. */
+/*
+ * Shift used for peak-rate fixed precision calculations.
+ * With
+ * - the current shift: 16 positions
+ * - the current type used to store rate: u32
+ * - the current unit of measure for rate: [sectors/usec], or, more precisely,
+ * [(sectors/usec) / 2^BFQ_RATE_SHIFT] to take into account the shift,
+ * the range of rates that can be stored is
+ * [1 / 2^BFQ_RATE_SHIFT, 2^(32 - BFQ_RATE_SHIFT)] sectors/usec =
+ * [1 / 2^16, 2^16] sectors/usec = [15e-6, 65536] sectors/usec =
+ * [15, 65G] sectors/sec
+ * Which, assuming a sector size of 512B, corresponds to a range of
+ * [7.5K, 33T] B/sec
+ */
#define BFQ_RATE_SHIFT 16
/*
@@ -2637,6 +2650,16 @@ static void bfq_update_rate_reset(struct bfq_data *bfqd, struct request *rq)
rate /= divisor; /* smoothing constant alpha = 1/divisor */
bfqd->peak_rate += rate;
+
+ /*
+ * For a very slow device, bfqd->peak_rate can reach 0 (see
+ * the minimum representable values reported in the comments
+ * on BFQ_RATE_SHIFT). Push to 1 if this happens, to avoid
+ * divisions by zero where bfqd->peak_rate is used as a
+ * divisor.
+ */
+ bfqd->peak_rate = max_t(u32, 1, bfqd->peak_rate);
+
update_thr_responsiveness_params(bfqd);
reset_computation:
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 350c39ae2896..ae2f3dadec44 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -499,7 +499,7 @@ struct bfq_data {
u64 delta_from_first;
/*
* Current estimate of the device peak rate, measured in
- * [BFQ_RATE_SHIFT * sectors/usec]. The left-shift by
+ * [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by
* BFQ_RATE_SHIFT is performed to increase precision in
* fixed-point calculations.
*/
diff --git a/block/bio.c b/block/bio.c
index e1708db48258..53e0f0a1ed94 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -43,9 +43,9 @@
* break badly! cannot be bigger than what you can fit into an
* unsigned short
*/
-#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
+#define BV(x, n) { .nr_vecs = x, .name = "biovec-"#n }
static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
- BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
+ BV(1, 1), BV(4, 4), BV(16, 16), BV(64, 64), BV(128, 128), BV(BIO_MAX_PAGES, max),
};
#undef BV
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c2033a232a44..1c16694ae145 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -307,11 +307,28 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
}
}
+static void blkg_pd_offline(struct blkcg_gq *blkg)
+{
+ int i;
+
+ lockdep_assert_held(blkg->q->queue_lock);
+ lockdep_assert_held(&blkg->blkcg->lock);
+
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkcg_policy *pol = blkcg_policy[i];
+
+ if (blkg->pd[i] && !blkg->pd[i]->offline &&
+ pol->pd_offline_fn) {
+ pol->pd_offline_fn(blkg->pd[i]);
+ blkg->pd[i]->offline = true;
+ }
+ }
+}
+
static void blkg_destroy(struct blkcg_gq *blkg)
{
struct blkcg *blkcg = blkg->blkcg;
struct blkcg_gq *parent = blkg->parent;
- int i;
lockdep_assert_held(blkg->q->queue_lock);
lockdep_assert_held(&blkcg->lock);
@@ -320,13 +337,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
WARN_ON_ONCE(list_empty(&blkg->q_node));
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
- for (i = 0; i < BLKCG_MAX_POLS; i++) {
- struct blkcg_policy *pol = blkcg_policy[i];
-
- if (blkg->pd[i] && pol->pd_offline_fn)
- pol->pd_offline_fn(blkg->pd[i]);
- }
-
if (parent) {
blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
@@ -369,6 +379,7 @@ static void blkg_destroy_all(struct request_queue *q)
struct blkcg *blkcg = blkg->blkcg;
spin_lock(&blkcg->lock);
+ blkg_pd_offline(blkg);
blkg_destroy(blkg);
spin_unlock(&blkcg->lock);
}
@@ -995,25 +1006,25 @@ static struct cftype blkcg_legacy_files[] = {
* @css: css of interest
*
* This function is called when @css is about to go away and responsible
- * for shooting down all blkgs associated with @css. blkgs should be
- * removed while holding both q and blkcg locks. As blkcg lock is nested
- * inside q lock, this function performs reverse double lock dancing.
+ * for offlining all blkgs pd and killing all wbs associated with @css.
+ * blkgs pd offline should be done while holding both q and blkcg locks.
+ * As blkcg lock is nested inside q lock, this function performs reverse
+ * double lock dancing.
*
* This is the blkcg counterpart of ioc_release_fn().
*/
static void blkcg_css_offline(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
+ struct blkcg_gq *blkg;
spin_lock_irq(&blkcg->lock);
- while (!hlist_empty(&blkcg->blkg_list)) {
- struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
- struct blkcg_gq, blkcg_node);
+ hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
struct request_queue *q = blkg->q;
if (spin_trylock(q->queue_lock)) {
- blkg_destroy(blkg);
+ blkg_pd_offline(blkg);
spin_unlock(q->queue_lock);
} else {
spin_unlock_irq(&blkcg->lock);
@@ -1027,11 +1038,43 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
wb_blkcg_offline(blkcg);
}
+/**
+ * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * This function is called when blkcg css is about to free and responsible for
+ * destroying all blkgs associated with @blkcg.
+ * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
+ * is nested inside q lock, this function performs reverse double lock dancing.
+ */
+static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
+{
+ spin_lock_irq(&blkcg->lock);
+ while (!hlist_empty(&blkcg->blkg_list)) {
+ struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
+ struct blkcg_gq,
+ blkcg_node);
+ struct request_queue *q = blkg->q;
+
+ if (spin_trylock(q->queue_lock)) {
+ blkg_destroy(blkg);
+ spin_unlock(q->queue_lock);
+ } else {
+ spin_unlock_irq(&blkcg->lock);
+ cpu_relax();
+ spin_lock_irq(&blkcg->lock);
+ }
+ }
+ spin_unlock_irq(&blkcg->lock);
+}
+
static void blkcg_css_free(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
int i;
+ blkcg_destroy_all_blkgs(blkcg);
+
mutex_lock(&blkcg_pol_mutex);
list_del(&blkcg->all_blkcgs_node);
@@ -1371,8 +1414,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
spin_lock(&blkg->blkcg->lock);
if (blkg->pd[pol->plid]) {
- if (pol->pd_offline_fn)
+ if (!blkg->pd[pol->plid]->offline &&
+ pol->pd_offline_fn) {
pol->pd_offline_fn(blkg->pd[pol->plid]);
+ blkg->pd[pol->plid]->offline = true;
+ }
pol->pd_free_fn(blkg->pd[pol->plid]);
blkg->pd[pol->plid] = NULL;
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 6d82c4f7fadd..abcb8684ba67 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -71,6 +71,78 @@ struct kmem_cache *blk_requestq_cachep;
*/
static struct workqueue_struct *kblockd_workqueue;
+/**
+ * blk_queue_flag_set - atomically set a queue flag
+ * @flag: flag to be set
+ * @q: request queue
+ */
+void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ queue_flag_set(flag, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_queue_flag_set);
+
+/**
+ * blk_queue_flag_clear - atomically clear a queue flag
+ * @flag: flag to be cleared
+ * @q: request queue
+ */
+void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ queue_flag_clear(flag, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_queue_flag_clear);
+
+/**
+ * blk_queue_flag_test_and_set - atomically test and set a queue flag
+ * @flag: flag to be set
+ * @q: request queue
+ *
+ * Returns the previous value of @flag - 0 if the flag was not set and 1 if
+ * the flag was already set.
+ */
+bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
+{
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ res = queue_flag_test_and_set(flag, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
+
+/**
+ * blk_queue_flag_test_and_clear - atomically test and clear a queue flag
+ * @flag: flag to be cleared
+ * @q: request queue
+ *
+ * Returns the previous value of @flag - 0 if the flag was not set and 1 if
+ * the flag was set.
+ */
+bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q)
+{
+ unsigned long flags;
+ bool res;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ res = queue_flag_test_and_clear(flag, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear);
+
static void blk_clear_congested(struct request_list *rl, int sync)
{
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -361,25 +433,14 @@ EXPORT_SYMBOL(blk_sync_queue);
*/
int blk_set_preempt_only(struct request_queue *q)
{
- unsigned long flags;
- int res;
-
- spin_lock_irqsave(q->queue_lock, flags);
- res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
- spin_unlock_irqrestore(q->queue_lock, flags);
-
- return res;
+ return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
}
EXPORT_SYMBOL_GPL(blk_set_preempt_only);
void blk_clear_preempt_only(struct request_queue *q)
{
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
+ blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
wake_up_all(&q->mq_freeze_wq);
- spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
@@ -629,9 +690,7 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
void blk_set_queue_dying(struct request_queue *q)
{
- spin_lock_irq(q->queue_lock);
- queue_flag_set(QUEUE_FLAG_DYING, q);
- spin_unlock_irq(q->queue_lock);
+ blk_queue_flag_set(QUEUE_FLAG_DYING, q);
/*
* When queue DYING flag is set, we need to block new req
@@ -719,6 +778,37 @@ void blk_cleanup_queue(struct request_queue *q)
del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
blk_sync_queue(q);
+ /*
+ * I/O scheduler exit is only safe after the sysfs scheduler attribute
+ * has been removed.
+ */
+ WARN_ON_ONCE(q->kobj.state_in_sysfs);
+
+ /*
+ * Since the I/O scheduler exit code may access cgroup information,
+ * perform I/O scheduler exit before disassociating from the block
+ * cgroup controller.
+ */
+ if (q->elevator) {
+ ioc_clear_queue(q);
+ elevator_exit(q, q->elevator);
+ q->elevator = NULL;
+ }
+
+ /*
+ * Remove all references to @q from the block cgroup controller before
+ * restoring @q->queue_lock to avoid that restoring this pointer causes
+ * e.g. blkcg_print_blkgs() to crash.
+ */
+ blkcg_exit_queue(q);
+
+ /*
+ * Since the cgroup code may dereference the @q->backing_dev_info
+ * pointer, only decrease its reference count after having removed the
+ * association with the block cgroup controller.
+ */
+ bdi_put(q->backing_dev_info);
+
if (q->mq_ops)
blk_mq_free_queue(q);
percpu_ref_exit(&q->q_usage_counter);
@@ -810,7 +900,7 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl)
struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
{
- return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
+ return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
}
EXPORT_SYMBOL(blk_alloc_queue);
@@ -827,7 +917,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
bool success = false;
int ret;
- rcu_read_lock_sched();
+ rcu_read_lock();
if (percpu_ref_tryget_live(&q->q_usage_counter)) {
/*
* The code that sets the PREEMPT_ONLY flag is
@@ -840,7 +930,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
percpu_ref_put(&q->q_usage_counter);
}
}
- rcu_read_unlock_sched();
+ rcu_read_unlock();
if (success)
return 0;
@@ -888,7 +978,21 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
kblockd_schedule_work(&q->timeout_work);
}
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
+/**
+ * blk_alloc_queue_node - allocate a request queue
+ * @gfp_mask: memory allocation flags
+ * @node_id: NUMA node to allocate memory from
+ * @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
+ * serialize calls to the legacy .request_fn() callback. Ignored for
+ * blk-mq request queues.
+ *
+ * Note: pass the queue lock as the third argument to this function instead of
+ * setting the queue lock pointer explicitly to avoid triggering a sporadic
+ * crash in the blkcg code. This function namely calls blkcg_init_queue() and
+ * the queue lock pointer must be set before blkcg_init_queue() is called.
+ */
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
+ spinlock_t *lock)
{
struct request_queue *q;
@@ -939,11 +1043,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
mutex_init(&q->sysfs_lock);
spin_lock_init(&q->__queue_lock);
- /*
- * By default initialize queue_lock to internal lock and driver can
- * override it later if need be.
- */
- q->queue_lock = &q->__queue_lock;
+ if (!q->mq_ops)
+ q->queue_lock = lock ? : &q->__queue_lock;
/*
* A queue starts its life with bypass turned on to avoid
@@ -952,7 +1053,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
* registered by blk_register_queue().
*/
q->bypass_depth = 1;
- __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
+ queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
init_waitqueue_head(&q->mq_freeze_wq);
@@ -1030,13 +1131,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
{
struct request_queue *q;
- q = blk_alloc_queue_node(GFP_KERNEL, node_id);
+ q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
if (!q)
return NULL;
q->request_fn = rfn;
- if (lock)
- q->queue_lock = lock;
if (blk_init_allocated_queue(q) < 0) {
blk_cleanup_queue(q);
return NULL;
@@ -2023,7 +2122,7 @@ out_unlock:
return BLK_QC_T_NONE;
}
-static void handle_bad_sector(struct bio *bio)
+static void handle_bad_sector(struct bio *bio, sector_t maxsector)
{
char b[BDEVNAME_SIZE];
@@ -2031,7 +2130,7 @@ static void handle_bad_sector(struct bio *bio)
printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
bio_devname(bio, b), bio->bi_opf,
(unsigned long long)bio_end_sector(bio),
- (long long)get_capacity(bio->bi_disk));
+ (long long)maxsector);
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -2093,67 +2192,58 @@ static noinline int should_fail_bio(struct bio *bio)
ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
/*
+ * Check whether this bio extends beyond the end of the device or partition.
+ * This may well happen - the kernel calls bread() without checking the size of
+ * the device, e.g., when mounting a file system.
+ */
+static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
+{
+ unsigned int nr_sectors = bio_sectors(bio);
+
+ if (nr_sectors && maxsector &&
+ (nr_sectors > maxsector ||
+ bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
+ handle_bad_sector(bio, maxsector);
+ return -EIO;
+ }
+ return 0;
+}
+
+/*
* Remap block n of partition p to block n+start(p) of the disk.
*/
static inline int blk_partition_remap(struct bio *bio)
{
struct hd_struct *p;
- int ret = 0;
+ int ret = -EIO;
rcu_read_lock();
p = __disk_get_part(bio->bi_disk, bio->bi_partno);
- if (unlikely(!p || should_fail_request(p, bio->bi_iter.bi_size) ||
- bio_check_ro(bio, p))) {
- ret = -EIO;
+ if (unlikely(!p))
+ goto out;
+ if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
+ goto out;
+ if (unlikely(bio_check_ro(bio, p)))
goto out;
- }
/*
* Zone reset does not include bi_size so bio_sectors() is always 0.
* Include a test for the reset op code and perform the remap if needed.
*/
- if (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET)
- goto out;
-
- bio->bi_iter.bi_sector += p->start_sect;
- bio->bi_partno = 0;
- trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
- bio->bi_iter.bi_sector - p->start_sect);
-
+ if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
+ if (bio_check_eod(bio, part_nr_sects_read(p)))
+ goto out;
+ bio->bi_iter.bi_sector += p->start_sect;
+ bio->bi_partno = 0;
+ trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
+ bio->bi_iter.bi_sector - p->start_sect);
+ }
+ ret = 0;
out:
rcu_read_unlock();
return ret;
}
-/*
- * Check whether this bio extends beyond the end of the device.
- */
-static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
-{
- sector_t maxsector;
-
- if (!nr_sectors)
- return 0;
-
- /* Test device or partition size, when known. */
- maxsector = get_capacity(bio->bi_disk);
- if (maxsector) {
- sector_t sector = bio->bi_iter.bi_sector;
-
- if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
- /*
- * This may well happen - the kernel calls bread()
- * without checking the size of the device, e.g., when
- * mounting a device.
- */
- handle_bad_sector(bio);
- return 1;
- }
- }
-
- return 0;
-}
-
static noinline_for_stack bool
generic_make_request_checks(struct bio *bio)
{
@@ -2164,9 +2254,6 @@ generic_make_request_checks(struct bio *bio)
might_sleep();
- if (bio_check_eod(bio, nr_sectors))
- goto end_io;
-
q = bio->bi_disk->queue;
if (unlikely(!q)) {
printk(KERN_ERR
@@ -2186,17 +2273,16 @@ generic_make_request_checks(struct bio *bio)
if (should_fail_bio(bio))
goto end_io;
- if (!bio->bi_partno) {
- if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+ if (bio->bi_partno) {
+ if (unlikely(blk_partition_remap(bio)))
goto end_io;
} else {
- if (blk_partition_remap(bio))
+ if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+ goto end_io;
+ if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
goto end_io;
}
- if (bio_check_eod(bio, nr_sectors))
- goto end_io;
-
/*
* Filter flush bio's early so that make_request based
* drivers without flush support don't have to worry
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 21cbc1f071c6..58b3b79cbe83 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -24,6 +24,64 @@
#include "blk-mq-debugfs.h"
#include "blk-mq-tag.h"
+static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
+{
+ if (stat->nr_samples) {
+ seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
+ stat->nr_samples, stat->mean, stat->min, stat->max);
+ } else {
+ seq_puts(m, "samples=0");
+ }
+}
+
+static int queue_poll_stat_show(void *data, struct seq_file *m)
+{
+ struct request_queue *q = data;
+ int bucket;
+
+ for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
+ seq_printf(m, "read (%d Bytes): ", 1 << (9+bucket));
+ print_stat(m, &q->poll_stat[2*bucket]);
+ seq_puts(m, "\n");
+
+ seq_printf(m, "write (%d Bytes): ", 1 << (9+bucket));
+ print_stat(m, &q->poll_stat[2*bucket+1]);
+ seq_puts(m, "\n");
+ }
+ return 0;
+}
+
+static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
+ __acquires(&q->requeue_lock)
+{
+ struct request_queue *q = m->private;
+
+ spin_lock_irq(&q->requeue_lock);
+ return seq_list_start(&q->requeue_list, *pos);
+}
+
+static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct request_queue *q = m->private;
+
+ return seq_list_next(v, &q->requeue_list, pos);
+}
+
+static void queue_requeue_list_stop(struct seq_file *m, void *v)
+ __releases(&q->requeue_lock)
+{
+ struct request_queue *q = m->private;
+
+ spin_unlock_irq(&q->requeue_lock);
+}
+
+static const struct seq_operations queue_requeue_list_seq_ops = {
+ .start = queue_requeue_list_start,
+ .next = queue_requeue_list_next,
+ .stop = queue_requeue_list_stop,
+ .show = blk_mq_debugfs_rq_show,
+};
+
static int blk_flags_show(struct seq_file *m, const unsigned long flags,
const char *const *flag_name, int flag_name_count)
{
@@ -125,16 +183,6 @@ inval:
return count;
}
-static void print_stat(struct seq_file *m, struct blk_rq_stat *stat)
-{
- if (stat->nr_samples) {
- seq_printf(m, "samples=%d, mean=%lld, min=%llu, max=%llu",
- stat->nr_samples, stat->mean, stat->min, stat->max);
- } else {
- seq_puts(m, "samples=0");
- }
-}
-
static int queue_write_hint_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
@@ -158,23 +206,30 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
return count;
}
-static int queue_poll_stat_show(void *data, struct seq_file *m)
+static int queue_zone_wlock_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
- int bucket;
+ unsigned int i;
- for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
- seq_printf(m, "read (%d Bytes): ", 1 << (9+bucket));
- print_stat(m, &q->poll_stat[2*bucket]);
- seq_puts(m, "\n");
+ if (!q->seq_zones_wlock)
+ return 0;
+
+ for (i = 0; i < blk_queue_nr_zones(q); i++)
+ if (test_bit(i, q->seq_zones_wlock))
+ seq_printf(m, "%u\n", i);
- seq_printf(m, "write (%d Bytes): ", 1 << (9+bucket));
- print_stat(m, &q->poll_stat[2*bucket+1]);
- seq_puts(m, "\n");
- }
return 0;
}
+static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
+ { "poll_stat", 0400, queue_poll_stat_show },
+ { "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
+ { "state", 0600, queue_state_show, queue_state_write },
+ { "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
+ { "zone_wlock", 0400, queue_zone_wlock_show, NULL },
+ { },
+};
+
#define HCTX_STATE_NAME(name) [BLK_MQ_S_##name] = #name
static const char *const hctx_state_name[] = {
HCTX_STATE_NAME(STOPPED),
@@ -295,6 +350,20 @@ static const char *const rqf_name[] = {
};
#undef RQF_NAME
+static const char *const blk_mq_rq_state_name_array[] = {
+ [MQ_RQ_IDLE] = "idle",
+ [MQ_RQ_IN_FLIGHT] = "in_flight",
+ [MQ_RQ_COMPLETE] = "complete",
+};
+
+static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
+{
+ if (WARN_ON_ONCE((unsigned int)rq_state >
+ ARRAY_SIZE(blk_mq_rq_state_name_array)))
+ return "(?)";
+ return blk_mq_rq_state_name_array[rq_state];
+}
+
int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
{
const struct blk_mq_ops *const mq_ops = rq->q->mq_ops;
@@ -311,7 +380,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
seq_puts(m, ", .rq_flags=");
blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name,
ARRAY_SIZE(rqf_name));
- seq_printf(m, ", complete=%d", blk_rq_is_complete(rq));
+ seq_printf(m, ", .state=%s", blk_mq_rq_state_name(blk_mq_rq_state(rq)));
seq_printf(m, ", .tag=%d, .internal_tag=%d", rq->tag,
rq->internal_tag);
if (mq_ops->show_rq)
@@ -327,37 +396,6 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v)
}
EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show);
-static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos)
- __acquires(&q->requeue_lock)
-{
- struct request_queue *q = m->private;
-
- spin_lock_irq(&q->requeue_lock);
- return seq_list_start(&q->requeue_list, *pos);
-}
-
-static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos)
-{
- struct request_queue *q = m->private;
-
- return seq_list_next(v, &q->requeue_list, pos);
-}
-
-static void queue_requeue_list_stop(struct seq_file *m, void *v)
- __releases(&q->requeue_lock)
-{
- struct request_queue *q = m->private;
-
- spin_unlock_irq(&q->requeue_lock);
-}
-
-static const struct seq_operations queue_requeue_list_seq_ops = {
- .start = queue_requeue_list_start,
- .next = queue_requeue_list_next,
- .stop = queue_requeue_list_stop,
- .show = blk_mq_debugfs_rq_show,
-};
-
static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos)
__acquires(&hctx->lock)
{
@@ -747,14 +785,6 @@ static const struct file_operations blk_mq_debugfs_fops = {
.release = blk_mq_debugfs_release,
};
-static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
- {"poll_stat", 0400, queue_poll_stat_show},
- {"requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops},
- {"state", 0600, queue_state_show, queue_state_write},
- {"write_hints", 0600, queue_write_hint_show, queue_write_hint_store},
- {},
-};
-
static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
{"state", 0400, hctx_state_show},
{"flags", 0400, hctx_flags_show},
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index 76944e3271bf..e233996bb76f 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -21,6 +21,7 @@
* blk_mq_pci_map_queues - provide a default queue mapping for PCI device
* @set: tagset to provide the mapping for
* @pdev: PCI device associated with @set.
+ * @offset: Offset to use for the pci irq vector
*
* This function assumes the PCI device @pdev has at least as many available
* interrupt vectors as @set has queues. It will then query the vector
@@ -28,13 +29,14 @@
* that maps a queue to the CPUs that have irq affinity for the corresponding
* vector.
*/
-int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev)
+int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev,
+ int offset)
{
const struct cpumask *mask;
unsigned int queue, cpu;
for (queue = 0; queue < set->nr_hw_queues; queue++) {
- mask = pci_irq_get_affinity(pdev, queue);
+ mask = pci_irq_get_affinity(pdev, queue + offset);
if (!mask)
goto fallback;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 16e83e6df404..f5c7dbcb954f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -194,11 +194,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
*/
void blk_mq_quiesce_queue_nowait(struct request_queue *q)
{
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- queue_flag_set(QUEUE_FLAG_QUIESCED, q);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
@@ -239,11 +235,7 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
*/
void blk_mq_unquiesce_queue(struct request_queue *q)
{
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
- spin_unlock_irqrestore(q->queue_lock, flags);
+ blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
/* dispatch requests which are inserted during quiescing */
blk_mq_run_hw_queues(q, true);
@@ -986,9 +978,9 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
struct blk_mq_hw_ctx *hctx = flush_data->hctx;
struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
- sbitmap_clear_bit(sb, bitnr);
spin_lock(&ctx->lock);
list_splice_tail_init(&ctx->rq_list, flush_data->list);
+ sbitmap_clear_bit(sb, bitnr);
spin_unlock(&ctx->lock);
return true;
}
@@ -2556,7 +2548,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
{
struct request_queue *uninit_q, *q;
- uninit_q = blk_alloc_queue_n