summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--block/blk-core.c41
-rw-r--r--block/blk-mq.c148
-rw-r--r--block/blk.h3
-rw-r--r--block/bounce.c3
-rw-r--r--block/ioctl.c37
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/ext4/page-io.c1
-rw-r--r--fs/nilfs2/segbuf.c12
-rw-r--r--include/linux/blk_types.h21
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/swap.h1
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/block_io.c103
-rw-r--r--kernel/power/power.h9
-rw-r--r--kernel/power/swap.c159
-rw-r--r--kernel/sched/core.c5
-rw-r--r--mm/page_io.c2
20 files changed, 285 insertions, 291 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 2e5020f37d55..de474b5dee2b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1522,7 +1522,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
* Caller must ensure !blk_queue_nomerges(q) beforehand.
*/
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
- unsigned int *request_count)
+ unsigned int *request_count,
+ struct request **same_queue_rq)
{
struct blk_plug *plug;
struct request *rq;
@@ -1542,8 +1543,16 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
list_for_each_entry_reverse(rq, plug_list, queuelist) {
int el_ret;
- if (rq->q == q)
+ if (rq->q == q) {
(*request_count)++;
+ /*
+ * Only blk-mq multiple hardware queues case checks the
+ * rq in the same queue, there should be only one such
+ * rq in a queue
+ **/
+ if (same_queue_rq)
+ *same_queue_rq = rq;
+ }
if (rq->q != q || !blk_rq_merge_ok(rq, bio))
continue;
@@ -1608,7 +1617,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
* any locks.
*/
if (!blk_queue_nomerges(q) &&
- blk_attempt_plug_merge(q, bio, &request_count))
+ blk_attempt_plug_merge(q, bio, &request_count, NULL))
return;
spin_lock_irq(q->queue_lock);
@@ -1716,8 +1725,6 @@ static void handle_bad_sector(struct bio *bio)
bio->bi_rw,
(unsigned long long)bio_end_sector(bio),
(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
-
- set_bit(BIO_EOF, &bio->bi_flags);
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -3032,21 +3039,20 @@ void blk_start_plug(struct blk_plug *plug)
{
struct task_struct *tsk = current;
+ /*
+ * If this is a nested plug, don't actually assign it.
+ */
+ if (tsk->plug)
+ return;
+
INIT_LIST_HEAD(&plug->list);
INIT_LIST_HEAD(&plug->mq_list);
INIT_LIST_HEAD(&plug->cb_list);
-
/*
- * If this is a nested plug, don't actually assign it. It will be
- * flushed on its own.
+ * Store ordering should not be needed here, since a potential
+ * preempt will imply a full memory barrier
*/
- if (!tsk->plug) {
- /*
- * Store ordering should not be needed here, since a potential
- * preempt will imply a full memory barrier
- */
- tsk->plug = plug;
- }
+ tsk->plug = plug;
}
EXPORT_SYMBOL(blk_start_plug);
@@ -3193,10 +3199,11 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
void blk_finish_plug(struct blk_plug *plug)
{
+ if (plug != current->plug)
+ return;
blk_flush_plug_list(plug, false);
- if (plug == current->plug)
- current->plug = NULL;
+ current->plug = NULL;
}
EXPORT_SYMBOL(blk_finish_plug);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ade8a2d1b0aa..c382a34fe5ac 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -89,7 +89,8 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
return -EBUSY;
ret = wait_event_interruptible(q->mq_freeze_wq,
- !q->mq_freeze_depth || blk_queue_dying(q));
+ !atomic_read(&q->mq_freeze_depth) ||
+ blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
if (ret)
@@ -112,13 +113,10 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
void blk_mq_freeze_queue_start(struct request_queue *q)
{
- bool freeze;
+ int freeze_depth;
- spin_lock_irq(q->queue_lock);
- freeze = !q->mq_freeze_depth++;
- spin_unlock_irq(q->queue_lock);
-
- if (freeze) {
+ freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
+ if (freeze_depth == 1) {
percpu_ref_kill(&q->mq_usage_counter);
blk_mq_run_hw_queues(q, false);
}
@@ -143,13 +141,11 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
void blk_mq_unfreeze_queue(struct request_queue *q)
{
- bool wake;
+ int freeze_depth;
- spin_lock_irq(q->queue_lock);
- wake = !--q->mq_freeze_depth;
- WARN_ON_ONCE(q->mq_freeze_depth < 0);
- spin_unlock_irq(q->queue_lock);
- if (wake) {
+ freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
+ WARN_ON_ONCE(freeze_depth < 0);
+ if (!freeze_depth) {
percpu_ref_reinit(&q->mq_usage_counter);
wake_up_all(&q->mq_freeze_wq);
}
@@ -1224,6 +1220,38 @@ static struct request *blk_mq_map_request(struct request_queue *q,
return rq;
}
+static int blk_mq_direct_issue_request(struct request *rq)
+{
+ int ret;
+ struct request_queue *q = rq->q;
+ struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q,
+ rq->mq_ctx->cpu);
+ struct blk_mq_queue_data bd = {
+ .rq = rq,
+ .list = NULL,
+ .last = 1
+ };
+
+ /*
+ * For OK queue, we are done. For error, kill it. Any other
+ * error (busy), just add it to our list as we previously
+ * would have done
+ */
+ ret = q->mq_ops->queue_rq(hctx, &bd);
+ if (ret == BLK_MQ_RQ_QUEUE_OK)
+ return 0;
+ else {
+ __blk_mq_requeue_request(rq);
+
+ if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+ rq->errors = -EIO;
+ blk_mq_end_request(rq, rq->errors);
+ return 0;
+ }
+ return -1;
+ }
+}
+
/*
* Multiple hardware queue variant. This will not use per-process plugs,
* but will attempt to bypass the hctx queueing if we can go straight to
@@ -1235,6 +1263,9 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
struct blk_map_ctx data;
struct request *rq;
+ unsigned int request_count = 0;
+ struct blk_plug *plug;
+ struct request *same_queue_rq = NULL;
blk_queue_bounce(q, &bio);
@@ -1243,6 +1274,10 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
return;
}
+ if (!is_flush_fua && !blk_queue_nomerges(q) &&
+ blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
+ return;
+
rq = blk_mq_map_request(q, bio, &data);
if (unlikely(!rq))
return;
@@ -1253,38 +1288,42 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
goto run_queue;
}
+ plug = current->plug;
/*
* If the driver supports defer issued based on 'last', then
* queue it up like normal since we can potentially save some
* CPU this way.
*/
- if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
- struct blk_mq_queue_data bd = {
- .rq = rq,
- .list = NULL,
- .last = 1
- };
- int ret;
+ if (((plug && !blk_queue_nomerges(q)) || is_sync) &&
+ !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
+ struct request *old_rq = NULL;
blk_mq_bio_to_request(rq, bio);
/*
- * For OK queue, we are done. For error, kill it. Any other
- * error (busy), just add it to our list as we previously
- * would have done
+ * we do limited pluging. If bio can be merged, do merge.
+ * Otherwise the existing request in the plug list will be
+ * issued. So the plug list will have one request at most
*/
- ret = q->mq_ops->queue_rq(data.hctx, &bd);
- if (ret == BLK_MQ_RQ_QUEUE_OK)
- goto done;
- else {
- __blk_mq_requeue_request(rq);
-
- if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
- rq->errors = -EIO;
- blk_mq_end_request(rq, rq->errors);
- goto done;
+ if (plug) {
+ /*
+ * The plug list might get flushed before this. If that
+ * happens, same_queue_rq is invalid and plug list is empty
+ **/
+ if (same_queue_rq && !list_empty(&plug->mq_list)) {
+ old_rq = same_queue_rq;
+ list_del_init(&old_rq->queuelist);
}
- }
+ list_add_tail(&rq->queuelist, &plug->mq_list);
+ } else /* is_sync */
+ old_rq = rq;
+ blk_mq_put_ctx(data.ctx);
+ if (!old_rq)
+ return;
+ if (!blk_mq_direct_issue_request(old_rq))
+ return;
+ blk_mq_insert_request(old_rq, false, true, true);
+ return;
}
if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1297,7 +1336,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
run_queue:
blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
}
-done:
blk_mq_put_ctx(data.ctx);
}
@@ -1309,16 +1347,11 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
{
const int is_sync = rw_is_sync(bio->bi_rw);
const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
- unsigned int use_plug, request_count = 0;
+ struct blk_plug *plug;
+ unsigned int request_count = 0;
struct blk_map_ctx data;
struct request *rq;
- /*
- * If we have multiple hardware queues, just go directly to
- * one of those for sync IO.
- */
- use_plug = !is_flush_fua && !is_sync;
-
blk_queue_bounce(q, &bio);
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
@@ -1326,8 +1359,8 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
return;
}
- if (use_plug && !blk_queue_nomerges(q) &&
- blk_attempt_plug_merge(q, bio, &request_count))
+ if (!is_flush_fua && !blk_queue_nomerges(q) &&
+ blk_attempt_plug_merge(q, bio, &request_count, NULL))
return;
rq = blk_mq_map_request(q, bio, &data);
@@ -1345,21 +1378,18 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
* utilize that to temporarily store requests until the task is
* either done or scheduled away.
*/
- if (use_plug) {
- struct blk_plug *plug = current->plug;
-
- if (plug) {
- blk_mq_bio_to_request(rq, bio);
- if (list_empty(&plug->mq_list))
- trace_block_plug(q);
- else if (request_count >= BLK_MAX_REQUEST_COUNT) {
- blk_flush_plug_list(plug, false);
- trace_block_plug(q);
- }
- list_add_tail(&rq->queuelist, &plug->mq_list);
- blk_mq_put_ctx(data.ctx);
- return;
+ plug = current->plug;
+ if (plug) {
+ blk_mq_bio_to_request(rq, bio);
+ if (list_empty(&plug->mq_list))
+ trace_block_plug(q);
+ else if (request_count >= BLK_MAX_REQUEST_COUNT) {
+ blk_flush_plug_list(plug, false);
+ trace_block_plug(q);
}
+ list_add_tail(&rq->queuelist, &plug->mq_list);
+ blk_mq_put_ctx(data.ctx);
+ return;
}
if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -2047,7 +2077,7 @@ void blk_mq_free_queue(struct request_queue *q)
/* Basically redo blk_mq_init_queue with queue frozen */
static void blk_mq_queue_reinit(struct request_queue *q)
{
- WARN_ON_ONCE(!q->mq_freeze_depth);
+ WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
blk_mq_sysfs_unregister(q);
diff --git a/block/blk.h b/block/blk.h
index 4b48d55e588e..026d9594142b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -78,7 +78,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
struct bio *bio);
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
- unsigned int *request_count);
+ unsigned int *request_count,
+ struct request **same_queue_rq);
void blk_account_io_start(struct request *req, bool new_io);
void blk_account_io_completion(struct request *req, unsigned int bytes);
diff --git a/block/bounce.c b/block/bounce.c
index ab21ba203d5c..4bac72579c1f 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -128,9 +128,6 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
struct bio_vec *bvec, *org_vec;
int i;
- if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
- set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
-
/*
* free up bounce indirect pages used
*/
diff --git a/block/ioctl.c b/block/ioctl.c
index 7d8befde2aca..8061eba42887 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -150,21 +150,48 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
}
}
-static int blkdev_reread_part(struct block_device *bdev)
+/*
+ * This is an exported API for the block driver, and will not
+ * acquire bd_mutex. This API should be used in case that
+ * caller has held bd_mutex already.
+ */
+int __blkdev_reread_part(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
- int res;
if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- if (!mutex_trylock(&bdev->bd_mutex))
- return -EBUSY;
- res = rescan_partitions(disk, bdev);
+
+ lockdep_assert_held(&bdev->bd_mutex);
+
+ return rescan_partitions(disk, bdev);
+}
+EXPORT_SYMBOL(__blkdev_reread_part);
+
+/*
+ * This is an exported API for the block driver, and will
+ * try to acquire bd_mutex. If bd_mutex has been held already
+ * in current context, please call __blkdev_reread_part().
+ *
+ * Make sure the held locks in current context aren't required
+ * in open()/close() handler and I/O path for avoiding ABBA deadlock:
+ * - bd_mutex is held before calling block driver's open/close
+ * handler
+ * - reading partition table may submit I/O to the block device
+ */
+int blkdev_reread_part(struct block_device *bdev)
+{
+ int res;
+
+ mutex_lock(&bdev->bd_mutex);
+ res = __blkdev_reread_part(bdev);
mutex_unlock(&bdev->bd_mutex);
+
return res;
}
+EXPORT_SYMBOL(blkdev_reread_part);
static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
uint64_t len, int secure)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2ef9a4b72d06..e08a926fe12c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3269,11 +3269,8 @@ static int write_dev_supers(struct btrfs_device *device,
*/
static void btrfs_end_empty_barrier(struct bio *bio, int err)
{
- if (err) {
- if (err == -EOPNOTSUPP)
- set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+ if (err)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
- }
if (bio->bi_private)
complete(bio->bi_private);
bio_put(bio);
@@ -3301,11 +3298,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
wait_for_completion(&device->flush_wait);
- if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
- printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
- rcu_str_deref(device->name));
- device->nobarriers = 1;
- } else if (!bio_flagged(bio, BIO_UPTODATE)) {
+ if (!bio_flagged(bio, BIO_UPTODATE)) {
ret = -EIO;
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 43af5a61ad25..1e155299abc0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2767,8 +2767,6 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
else
btrfsic_submit_bio(rw, bio);
- if (bio_flagged(bio, BIO_EOPNOTSUPP))
- ret = -EOPNOTSUPP;
bio_put(bio);
return ret;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index c7a5602d01ee..efd85e0e8660 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2938,10 +2938,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
{
struct buffer_head *bh = bio->bi_private;
- if (err == -EOPNOTSUPP) {
- set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
- }
-
if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
set_bit(BH_Quiet, &bh->b_state);
@@ -3041,13 +3037,7 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
if (buffer_prio(bh))
rw |= REQ_PRIO;
- bio_get(bio);
submit_bio(rw, bio);
-
- if (bio_flagged(bio, BIO_EOPNOTSUPP))
- ret = -EOPNOTSUPP;
-
- bio_put(bio);
return ret;
}
EXPORT_SYMBOL_GPL(_submit_bh);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 5765f88b3904..c5d81e8d84c3 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -359,7 +359,6 @@ void ext4_io_submit(struct ext4_io_submit *io)
if (bio) {
bio_get(io->io_bio);
submit_bio(io->io_op, io->io_bio);
- BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
bio_put(io->io_bio);
}
io->io_bio = NULL;
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index dc3a9efdaab8..42468e5ab3e7 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -343,11 +343,6 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct nilfs_segment_buffer *segbuf = bio->bi_private;
- if (err == -EOPNOTSUPP) {
- set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
- /* to be detected by nilfs_segbuf_submit_bio() */
- }
-
if (!uptodate)
atomic_inc(&segbuf->sb_err);
@@ -374,15 +369,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
bio->bi_end_io = nilfs_end_bio_write;
bio->bi_private = segbuf;
- bio_get(bio);
submit_bio(mode, bio);
segbuf->sb_nbio++;
- if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
- bio_put(bio);
- err = -EOPNOTSUPP;
- goto failed;
- }
- bio_put(bio);
wi->bio = NULL;
wi->rest_blocks -= wi->end - wi->start;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 93d2e7153816..3f4ded0b1a34 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -112,18 +112,15 @@ struct bio {
* bio flags
*/
#define BIO_UPTODATE 0 /* ok after I/O completion */
-#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */
-#define BIO_EOF 2 /* out-out-bounds error */
-#define BIO_SEG_VALID 3 /* bi_phys_segments valid */
-#define BIO_CLONED 4 /* doesn't own data */
-#define BIO_BOUNCED 5 /* bio is a bounce bio */
-#define BIO_USER_MAPPED 6 /* contains user pages */
-#define BIO_EOPNOTSUPP 7 /* not supported */
-#define BIO_NULL_MAPPED 8 /* contains invalid user pages */
-#define BIO_QUIET 9 /* Make BIO Quiet */
-#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */
-#define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */
-#define BIO_REFFED 12 /* bio has elevated ->bi_cnt */
+#define BIO_SEG_VALID 1 /* bi_phys_segments valid */
+#define BIO_CLONED 2 /* doesn't own data */
+#define BIO_BOUNCED 3 /* bio is a bounce bio */
+#define BIO_USER_MAPPED 4 /* contains user pages */
+#define BIO_NULL_MAPPED 5 /* contains invalid user pages */
+#define BIO_QUIET 6 /* Make BIO Quiet */
+#define BIO_SNAP_STABLE 7 /* bio data must be snapshotted during write */
+#define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */
+#define BIO_REFFED 9 /* bio has elevated ->bi_cnt */
/*
* Flags starting here get preserved by bio_reset() - this includes
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2da818a48097..bc917956a6d0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -444,7 +444,7 @@ struct request_queue {
struct mutex sysfs_lock;
int bypass_depth;
- int mq_freeze_depth;
+ atomic_t mq_freeze_depth;
#if defined(CONFIG_BLK_DEV_BSG)
bsg_job_fn *bsg_job_fn;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 35ec87e490b1..1ef63900243c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2279,6 +2279,9 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
void *holder);
extern void blkdev_put(struct block_device *bdev, fmode_t mode);
+extern int __blkdev_reread_part(struct block_device *bdev);
+extern int blkdev_reread_part(struct block_device *bdev);
+
#ifdef CONFIG_SYSFS
extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
extern void bd_unlink_disk_holder(struct block_device *bdev,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index cee108cbe2d5..38874729dc5f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -377,7 +377,6 @@ extern void end_swap_bio_write(struct bio *bio, int err);
extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
void (*end_write_func)(struct bio *, int));
extern int swap_set_page_dirty(struct page *page);
-extern void end_swap_bio_read(struct bio *bio, int err);
int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
unsigned long nr_pages, sector_t start_block);
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bff11ef..cb880a14cc39 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,8 +7,7 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
obj-$(CONFIG_FREEZER) += process.o
obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
-obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
- block_io.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o
obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o
obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
deleted file mode 100644
index 9a58bc258810..000000000000
--- a/kernel/power/block_io.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * This file provides functions for block I/O operations on swap/file.
- *
- * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
- * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
- *
- * This file is released under the GPLv2.
- */
-
-#include <linux/bio.h>
-#include <linux/kernel.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-
-#include "power.h"
-
-/**
- * submit - submit BIO request.
- * @rw: READ or WRITE.
- * @off physical offset of page.
- * @page: page we're reading or writing.
- * @bio_chain: list of pending biod (for async reading)
- *
- * Straight from the textbook - allocate and initialize the bio.
- * If we're reading, make sure the page is marked as dirty.
- * Then submit it and, if @bio_chain == NULL, wait.
- */
-static int submit(int rw, struct block_device *bdev, sector_t sector,
- struct page *page, struct bio **bio_chain)
-{
- const int bio_rw = rw | REQ_SYNC;
- struct bio *bio;
-
- bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
- bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
- bio->bi_end_io = end_swap_bio_read;
-
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
- printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
- (unsigned long long)sector);
- bio_put(bio);
- return -EFAULT;
- }
-
- lock_page(page);
- bio_get(bio);
-
- if (bio_chain == NULL) {
- submit_bio(bio_rw, bio);
- wait_on_page_locked(page);
- if (rw == READ)
- bio_set_pages_dirty(bio);
- bio_put(bio);
- } else {
- if (rw == READ)
- get_page(page); /* These pages are freed later */
- bio->bi_private = *bio_chain;
- *bio_chain = bio;
- submit_bio(bio_rw, bio);
- }
- return 0;
-}
-
-int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
- return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
- virt_to_page(addr), bio_chain);
-}
-
-int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
- return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
- virt_to_page(addr), bio_chain);
-}
-
-int hib_wait_on_bio_chain(struct bio **bio_chain)
-{
- struct bio *bio;
- struct bio *next_bio;
- int ret = 0;
-
- if (bio_chain == NULL)
- return 0;
-
- bio = *bio_chain;
- if (bio == NULL)
- return 0;
- while (bio) {
- struct page *page;
-
- next_bio = bio->bi_private;
- page = bio->bi_io_vec[0].bv_page;
- wait_on_page_locked(page);
- if (!PageUptodate(page) || PageError(page))
- ret = -EIO;
- put_page(page);
- bio_put(bio);
- bio = next_bio;
- }
- *bio_chain = NULL;
- return ret;
-}
diff --git a/kernel/power/power.h b/kernel/power/power.h
index ce9b8328a689..caadb566e82b 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -163,15 +163,6 @@ extern void swsusp_close(fmode_t);
extern int swsusp_unmark(void);
#endif
-/* kernel/power/block_io.c */
-extern struct block_device *hib_resume_bdev;
-
-extern int hib_bio_read_page(pgoff_t page_off, void *addr,
- struct bio **bio_chain);
-extern int hib_bio_write_page(pgoff_t page_off, void *addr,
- struct bio **bio_chain);
-extern int hib_wait_on_bio_chain(struct bio **bio_chain);
-
struct timeval;
/* kernel/power/swsusp.c */
extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 570aff817543..2f30ca91e4fa 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -212,7 +212,84 @@ int swsusp_swap_in_use(void)
*/
static unsigned short root_swap = 0xffff;
-struct block_device *hib_resume_bdev;
+static struct block_device *hib_resume_bdev;
+
+struct hib_bio_batch {
+ atomic_t count;
+ wait_queue_head_t wait;
+ int error;
+};
+
+static void hib_init_batch(struct hib_bio_batch *hb)
+{
+ atomic_set(&hb->count, 0);
+ init_waitqueue_head(&hb->wait);
+ hb->error = 0;
+}
+
+static void hib_end_io(struct bio *bio, int error)
+{
+ struct hib_bio_batch *hb = bio->bi_private;
+ const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct page *page = bio->bi_io_vec[0].bv_page;
+
+ if (!uptodate || error) {
+ printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
+ imajor(bio->bi_bdev->bd_inode),
+ iminor(bio->bi_bdev->bd_inode),
+ (unsigned long long)bio->bi_iter.bi_sector);
+
+ if (!error)
+ error = -EIO;
+ }
+
+ if (bio_data_dir(bio) == WRITE)
+ put_page(page);
+
+ if (error && !hb->error)
+ hb->error = error;
+ if (atomic_dec_and_test(&hb->count))
+ wake_up(&hb->wait);
+
+ bio_put(bio);
+}
+
+static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
+ struct hib_bio_batch *hb)
+{
+ struct page *page = virt_to_page(addr);
+ struct bio *bio;
+ int error = 0;
+
+ bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+ bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
+ bio->bi_bdev = hib_resume_bdev;
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
+ (unsigned long long)bio->bi_iter.bi_sector);
+ bio_put(bio);
+ return -EFAULT;
+ }
+
+ if (hb) {
+ bio->bi_end_io = hib_end_io;
+ bio->bi_private = hb;
+ atomic_inc(&hb->count);
+ submit_bio(rw, bio);
+ } else {
+ error = submit_bio_wait(rw, bio);
+ bio_put(bio);
+ }
+
+ return error;
+}
+
+static int hib_wait_io(struct hib_bio_batch *hb)
+{
+ wait_event(hb->wait, atomic_read(&hb->count) == 0);
+ return hb->error;
+}
/*
* Saving part
@@ -222,7 +299,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
{
int error;
- hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+ hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL);
if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
!memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
@@ -231,7 +308,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
swsusp_header->flags = flags;
if (flags & SF_CRC32_MODE)
swsusp_header->crc32 = handle->crc32;
- error = hib_bio_write_page(swsusp_resume_block,
+ error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
swsusp_header, NULL);
} else {
printk(KERN_ERR "PM: Swap header not found!\n");
@@ -271,10 +348,10 @@ static int swsusp_swap_check(void)
* write_page - Write one page to given swap location.
* @buf: Address we're writing.
* @offset: Offset of the swap page we're writing to.
- * @bio_chain: Link the next write BIO here
+ * @hb: bio completion batch
*/
-static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
+static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
{
void *src;
int ret;
@@ -282,13 +359,13 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
if (!offset)
return -ENOSPC;
- if (bio_chain) {
+ if (hb) {
src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
__GFP_NORETRY);
if (src) {
copy_page(src, buf);
} else {
- ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+ ret = hib_wait_io(hb); /* Free pages */
if (ret)
return ret;
src = (void *)__get_free_page(__GFP_WAIT |
@@ -298,14 +375,14 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
copy_page(src, buf);
} else {
WARN_ON_ONCE(1);
- bio_chain = NULL; /* Go synchronous */
+ hb = NULL; /* Go synchronous */
src = buf;
}
}
} else {
src = buf;
}
- return hib_bio_write_page(offset, src, bio_chain);
+ return hib_submit_io(WRITE_SYNC, offset, src, hb);
}
static void release_swap_writer(struct swap_map_handle *handle)
@@ -348,7 +425,7 @@ err_close:
}
static int swap_write_page(struct swap_map_handle *handle, void *buf,
- struct bio **bio_chain)
+ struct hib_bio_batch *hb)
{
int error = 0;
sector_t offset;
@@ -356,7 +433,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
if (!handle->cur)
return -EINVAL;
offset = alloc_swapdev_block(root_swap);
- error = write_page(buf, offset, bio_chain);
+ error = write_page(buf, offset, hb);
if (error)
return error;
handle->cur->entries[handle->k++] = offset;
@@ -365,15 +442,15 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
if (!offset)
return -ENOSPC;
handle->cur->next_swap = offset;
- error = write_page(handle->cur, handle->cur_swap, bio_chain);
+ error = write_page(handle->cur, handle->cur_swap, hb);
if (error)
goto out;
clear_page(handle->cur);
handle->cur_swap = offset;
handle->k = 0;
- if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
- error = hib_wait_on_bio_chain(bio_chain);
+ if (hb && low_free_pages() <= handle->reqd_free_pages) {
+ error = hib_wait_io(hb);
if (error)
goto out;
/*
@@ -445,23 +522,24 @@ static int save_image(struct swap_map_handle *handle,
int ret;
int nr_pages;
int err2;
- struct bio *bio;
+ struct hib_bio_batch hb;
ktime_t start