From d11a3998985b351aaab6bbdc23bc884bd5e815c8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 9 Feb 2019 15:40:24 -0700 Subject: block: kill QUEUE_FLAG_FLUSH_NQ We have various helpers for setting/clearing this flag, and also a helper to check if the queue supports queueable flushes or not. But nobody uses them anymore, kill it with fire. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 338604dff7d0..24ccab51085f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -592,7 +592,6 @@ struct request_queue { #define QUEUE_FLAG_POLL 19 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 20 /* Write back caching */ #define QUEUE_FLAG_FUA 21 /* device supports FUA writes */ -#define QUEUE_FLAG_FLUSH_NQ 22 /* flush not queueuable */ #define QUEUE_FLAG_DAX 23 /* device supports DAX */ #define QUEUE_FLAG_STATS 24 /* track IO start and completion times */ #define QUEUE_FLAG_POLL_STATS 25 /* collecting stats for hybrid polling */ @@ -1069,7 +1068,6 @@ extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); -extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); /* @@ -1446,11 +1444,6 @@ static inline unsigned int block_size(struct block_device *bdev) return bdev->bd_block_size; } -static inline bool queue_flush_queueable(struct request_queue *q) -{ - return !test_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags); -} - typedef struct {struct page *v;} Sector; unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); -- cgit v1.2.3 From eca7abf31abba2acac445ec6a1d3f94cf0cab918 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 9 Feb 2019 15:42:07 -0700 Subject: block: queue flag cleanup We have QUEUE_FLAG_DEFAULT defined, but it's not used anymore since the legacy IO stack is gone. Kill it. Sanitize the queue flags in general, they use spaces (for some reason), and the space is pretty sparse. With the flags renumbered, we can more clearly see how many we have available. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 58 +++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24ccab51085f..3603270cb82d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -572,37 +572,33 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; -#define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ -#define QUEUE_FLAG_DYING 2 /* queue being torn down */ -#define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */ -#define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ -#define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */ -#define QUEUE_FLAG_NONROT 9 /* non-rotational device (SSD) */ -#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT 10 /* do disk/partitions IO accounting */ -#define QUEUE_FLAG_DISCARD 11 /* supports DISCARD */ -#define QUEUE_FLAG_NOXMERGES 12 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 13 /* Contributes to random pool */ -#define QUEUE_FLAG_SECERASE 14 /* supports secure erase */ -#define QUEUE_FLAG_SAME_FORCE 15 /* force complete on same CPU */ -#define QUEUE_FLAG_DEAD 16 /* queue tear-down finished */ -#define QUEUE_FLAG_INIT_DONE 17 /* queue is initialized */ -#define QUEUE_FLAG_NO_SG_MERGE 18 /* don't attempt to merge SG segments*/ -#define QUEUE_FLAG_POLL 19 /* IO polling enabled if set */ -#define QUEUE_FLAG_WC 20 /* Write back caching */ -#define QUEUE_FLAG_FUA 21 /* device supports FUA writes */ -#define QUEUE_FLAG_DAX 23 /* device supports DAX */ -#define QUEUE_FLAG_STATS 24 /* track IO start and completion times */ -#define QUEUE_FLAG_POLL_STATS 25 /* collecting stats for hybrid polling */ -#define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ -#define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ -#define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ -#define QUEUE_FLAG_PCI_P2PDMA 29 /* device supports PCI p2p requests */ - -#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_ADD_RANDOM)) +#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ +#define QUEUE_FLAG_DYING 1 /* queue being torn down */ +#define QUEUE_FLAG_BIDI 2 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ +#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ +#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ +#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ +#define QUEUE_FLAG_DISCARD 8 /* supports DISCARD */ +#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ +#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ +#define QUEUE_FLAG_SECERASE 11 /* supports secure erase */ +#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ +#define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ +#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ +#define QUEUE_FLAG_NO_SG_MERGE 15 /* don't attempt to merge SG segments*/ +#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ +#define QUEUE_FLAG_WC 17 /* Write back caching */ +#define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ +#define QUEUE_FLAG_DAX 19 /* device supports DAX */ +#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ +#define QUEUE_FLAG_POLL_STATS 21 /* collecting stats for hybrid polling */ +#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ +#define QUEUE_FLAG_SCSI_PASSTHROUGH 23 /* queue supports SCSI commands */ +#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ +#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP)) -- cgit v1.2.3 From 8a2ee44a371c8cbef587ea609908c3cbf1645231 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Feb 2019 19:13:07 +0800 Subject: btrfs: look at bi_size for repair decisions bio_readpage_error currently uses bi_vcnt to decide if it is worth retrying an I/O. But the vector count is mostly an implementation artifact - it really should figure out if there is more than a single sector worth retrying. Use bi_size for that and shift by PAGE_SHIFT. This really should be blocks/sectors, but given that btrfs doesn't support a sector size different from the PAGE_SIZE using the page size keeps the changes to a minimum. Reviewed-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7380b094dcca..72b4f7be2106 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -263,12 +263,6 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) bv->bv_len = iter.bi_bvec_done; } -static inline unsigned bio_pages_all(struct bio *bio) -{ - WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); - return bio->bi_vcnt; -} - static inline struct bio_vec *bio_first_bvec_all(struct bio *bio) { WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)); -- cgit v1.2.3 From 19d62f6d00972f957c94aba0975c14490cfed385 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:09 +0800 Subject: block: remove bvec_iter_rewind() Commit 7759eb23fd980 ("block: remove bio_rewind_iter()") removes bio_rewind_iter(), then no one uses bvec_iter_rewind() any more, so remove it. Reviewed-by: Omar Sandoval Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bvec.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 02c73c6aa805..ba0ae40e77c9 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -92,30 +92,6 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } -static inline bool bvec_iter_rewind(const struct bio_vec *bv, - struct bvec_iter *iter, - unsigned int bytes) -{ - while (bytes) { - unsigned len = min(bytes, iter->bi_bvec_done); - - if (iter->bi_bvec_done == 0) { - if (WARN_ONCE(iter->bi_idx == 0, - "Attempted to rewind iter beyond " - "bvec's boundaries\n")) { - return false; - } - iter->bi_idx--; - iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len; - continue; - } - bytes -= len; - iter->bi_size += len; - iter->bi_bvec_done -= len; - } - return true; -} - #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ -- cgit v1.2.3 From 3d75ca0adef4280650c6690a0c4702a74a6f3c95 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:10 +0800 Subject: block: introduce multi-page bvec helpers This patch introduces helpers of 'mp_bvec_iter_*' for multi-page bvec support. The introduced helpers treate one bvec as real multi-page segment, which may include more than one pages. The existed helpers of bvec_iter_* are interfaces for supporting current bvec iterator which is thought as single-page by drivers, fs, dm and etc. These introduced helpers will build single-page bvec in flight, so this way won't break current bio/bvec users, which needn't any change. Follows some multi-page bvec background: - bvecs stored in bio->bi_io_vec is always multi-page style - bvec(struct bio_vec) represents one physically contiguous I/O buffer, now the buffer may include more than one page after multi-page bvec is supported, and all these pages represented by one bvec is physically contiguous. Before multi-page bvec support, at most one page is included in one bvec, we call it single-page bvec. - .bv_page of the bvec points to the 1st page in the multi-page bvec - .bv_offset of the bvec is the offset of the buffer in the bvec The effect on the current drivers/filesystem/dm/bcache/...: - almost everyone supposes that one bvec only includes one single page, so we keep the sp interface not changed, for example, bio_for_each_segment() still returns single-page bvec - bio_for_each_segment_all() will return single-page bvec too - during iterating, iterator variable(struct bvec_iter) is always updated in multi-page bvec style, and bvec_iter_advance() is kept not changed - returned(copied) single-page bvec is built in flight by bvec helpers from the stored multi-page bvec Reviewed-by: Christoph Hellwig Reviewed-by: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bvec.h | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index ba0ae40e77c9..0ae729b1c9fe 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -23,6 +23,7 @@ #include #include #include +#include /* * was unsigned short, but we might as well be ready for > 64kB I/O pages @@ -50,16 +51,39 @@ struct bvec_iter { */ #define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) -#define bvec_iter_page(bvec, iter) \ +/* multi-page (mp_bvec) helpers */ +#define mp_bvec_iter_page(bvec, iter) \ (__bvec_iter_bvec((bvec), (iter))->bv_page) -#define bvec_iter_len(bvec, iter) \ +#define mp_bvec_iter_len(bvec, iter) \ min((iter).bi_size, \ __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done) -#define bvec_iter_offset(bvec, iter) \ +#define mp_bvec_iter_offset(bvec, iter) \ (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done) +#define mp_bvec_iter_page_idx(bvec, iter) \ + (mp_bvec_iter_offset((bvec), (iter)) / PAGE_SIZE) + +#define mp_bvec_iter_bvec(bvec, iter) \ +((struct bio_vec) { \ + .bv_page = mp_bvec_iter_page((bvec), (iter)), \ + .bv_len = mp_bvec_iter_len((bvec), (iter)), \ + .bv_offset = mp_bvec_iter_offset((bvec), (iter)), \ +}) + +/* For building single-page bvec in flight */ + #define bvec_iter_offset(bvec, iter) \ + (mp_bvec_iter_offset((bvec), (iter)) % PAGE_SIZE) + +#define bvec_iter_len(bvec, iter) \ + min_t(unsigned, mp_bvec_iter_len((bvec), (iter)), \ + PAGE_SIZE - bvec_iter_offset((bvec), (iter))) + +#define bvec_iter_page(bvec, iter) \ + nth_page(mp_bvec_iter_page((bvec), (iter)), \ + mp_bvec_iter_page_idx((bvec), (iter))) + #define bvec_iter_bvec(bvec, iter) \ ((struct bio_vec) { \ .bv_page = bvec_iter_page((bvec), (iter)), \ -- cgit v1.2.3 From d18d91740ad22e9d7998884c4d80523d0ba95ddf Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:11 +0800 Subject: block: introduce bio_for_each_bvec() and rq_for_each_bvec() bio_for_each_bvec() is used for iterating over multi-page bvec for bio split & merge code. rq_for_each_bvec() can be used for drivers which may handle the multi-page bvec directly, so far loop is one perfect use case. Reviewed-by: Christoph Hellwig Reviewed-by: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 10 ++++++++++ include/linux/blkdev.h | 4 ++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 72b4f7be2106..7ef8a7505c0a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -156,6 +156,16 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) +#define __bio_for_each_bvec(bvl, bio, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ + bio_advance_iter((bio), &(iter), (bvl).bv_len)) + +/* iterate over multi-page bvec */ +#define bio_for_each_bvec(bvl, bio, iter) \ + __bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter) + #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) static inline unsigned bio_segments(struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3603270cb82d..b6292d469ea4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -792,6 +792,10 @@ struct req_iterator { __rq_for_each_bio(_iter.bio, _rq) \ bio_for_each_segment(bvl, _iter.bio, _iter.iter) +#define rq_for_each_bvec(bvl, _rq, _iter) \ + __rq_for_each_bio(_iter.bio, _rq) \ + bio_for_each_bvec(bvl, _iter.bio, _iter.iter) + #define rq_iter_last(bvec, _iter) \ (_iter.bio->bi_next == NULL && \ bio_iter_last(bvec, _iter.iter)) -- cgit v1.2.3 From 45a3fb95298b326ab8175f2bd97bd8666017b692 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:14 +0800 Subject: block: introduce mp_bvec_last_segment() BTRFS and guard_bio_eod() need to get the last singlepage segment from one multipage bvec, so introduce this helper to make them happy. Reviewed-by: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bvec.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 0ae729b1c9fe..21f76bad7be2 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -131,4 +131,26 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, .bi_bvec_done = 0, \ } +/* + * Get the last single-page segment from the multi-page bvec and store it + * in @seg + */ +static inline void mp_bvec_last_segment(const struct bio_vec *bvec, + struct bio_vec *seg) +{ + unsigned total = bvec->bv_offset + bvec->bv_len; + unsigned last_page = (total - 1) / PAGE_SIZE; + + seg->bv_page = nth_page(bvec->bv_page, last_page); + + /* the whole segment is inside the last page */ + if (bvec->bv_offset >= last_page * PAGE_SIZE) { + seg->bv_offset = bvec->bv_offset % PAGE_SIZE; + seg->bv_len = bvec->bv_len; + } else { + seg->bv_offset = 0; + seg->bv_len = total - last_page * PAGE_SIZE; + } +} + #endif /* __LINUX_BVEC_ITER_H */ -- cgit v1.2.3 From 6dc4f100c175dd0511ae8674786e7c9006cdfbfa Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:19 +0800 Subject: block: allow bio_for_each_segment_all() to iterate over multi-page bvec This patch introduces one extra iterator variable to bio_for_each_segment_all(), then we can allow bio_for_each_segment_all() to iterate over multi-page bvec. Given it is just one mechannical & simple change on all bio_for_each_segment_all() users, this patch does tree-wide change in one single patch, so that we can avoid to use a temporary helper for this conversion. Reviewed-by: Omar Sandoval Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++-- include/linux/bvec.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 7ef8a7505c0a..089370eb84d9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -128,12 +128,19 @@ static inline bool bio_full(struct bio *bio) return bio->bi_vcnt >= bio->bi_max_vecs; } +#define mp_bvec_for_each_segment(bv, bvl, i, iter_all) \ + for (bv = bvec_init_iter_all(&iter_all); \ + (iter_all.done < (bvl)->bv_len) && \ + (mp_bvec_next_segment((bvl), &iter_all), 1); \ + iter_all.done += bv->bv_len, i += 1) + /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it */ -#define bio_for_each_segment_all(bvl, bio, i) \ - for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++) +#define bio_for_each_segment_all(bvl, bio, i, iter_all) \ + for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++) \ + mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all) static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, unsigned bytes) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 21f76bad7be2..30a57b68d017 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -45,6 +45,12 @@ struct bvec_iter { current bvec */ }; +struct bvec_iter_all { + struct bio_vec bv; + int idx; + unsigned done; +}; + /* * various member access, note that bio_data should of course not be used * on highmem page vectors @@ -131,6 +137,30 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, .bi_bvec_done = 0, \ } +static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all) +{ + iter_all->bv.bv_page = NULL; + iter_all->done = 0; + + return &iter_all->bv; +} + +static inline void mp_bvec_next_segment(const struct bio_vec *bvec, + struct bvec_iter_all *iter_all) +{ + struct bio_vec *bv = &iter_all->bv; + + if (bv->bv_page) { + bv->bv_page = nth_page(bv->bv_page, 1); + bv->bv_offset = 0; + } else { + bv->bv_page = bvec->bv_page; + bv->bv_offset = bvec->bv_offset; + } + bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, + bvec->bv_len - iter_all->done); +} + /* * Get the last single-page segment from the multi-page bvec and store it * in @seg -- cgit v1.2.3 From 07173c3ec276cbb18dc0e0687d37d310e98a1480 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:20 +0800 Subject: block: enable multipage bvecs This patch pulls the trigger for multi-page bvecs. Reviewed-by: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 089370eb84d9..9f77adcfde82 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -441,7 +441,7 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); bool __bio_try_merge_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int off); + unsigned int len, unsigned int off, bool same_page); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); -- cgit v1.2.3 From 6861428921b51113520cd47897be6c2774e4fc58 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:21 +0800 Subject: block: always define BIO_MAX_PAGES as 256 Now multi-page bvec can cover CONFIG_THP_SWAP, so we don't need to increase BIO_MAX_PAGES for it. CONFIG_THP_SWAP needs to split one THP into normal pages and adds them all to one bio. With multipage-bvec, it just takes one bvec to hold them all. Reviewed-by: Omar Sandoval Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9f77adcfde82..bdd11d4c2f05 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -34,15 +34,7 @@ #define BIO_BUG_ON #endif -#ifdef CONFIG_THP_SWAP -#if HPAGE_PMD_NR > 256 -#define BIO_MAX_PAGES HPAGE_PMD_NR -#else #define BIO_MAX_PAGES 256 -#endif -#else -#define BIO_MAX_PAGES 256 -#endif #define bio_prio(bio) (bio)->bi_ioprio #define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) -- cgit v1.2.3 From 2705c93742e91730d335838025d75d8043861174 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:23 +0800 Subject: block: kill QUEUE_FLAG_NO_SG_MERGE Since bdced438acd83ad83a6c ("block: setup bi_phys_segments after splitting"), physical segment number is mainly figured out in blk_queue_split() for fast path, and the flag of BIO_SEG_VALID is set there too. Now only blk_recount_segments() and blk_recalc_rq_segments() use this flag. Basically blk_recount_segments() is bypassed in fast path given BIO_SEG_VALID is set in blk_queue_split(). For another user of blk_recalc_rq_segments(): - run in partial completion branch of blk_update_request, which is an unusual case - run in blk_cloned_rq_check_limits(), still not a big problem if the flag is killed since dm-rq is the only user. Multi-page bvec is enabled now, not doing S/G merging is rather pointless with the current setup of the I/O path, as it isn't going to save you a significant amount of cycles. Reviewed-by: Christoph Hellwig Reviewed-by: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b6292d469ea4..faed9d9eb84c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -588,7 +588,6 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_NO_SG_MERGE 15 /* don't attempt to merge SG segments*/ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_WC 17 /* Write back caching */ #define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ -- cgit v1.2.3 From 56d18f62f556b80105e38e7975975cf7465aae3e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 15 Feb 2019 19:13:24 +0800 Subject: block: kill BLK_MQ_F_SG_MERGE QUEUE_FLAG_NO_SG_MERGE has been killed, so kill BLK_MQ_F_SG_MERGE too. Reviewed-by: Christoph Hellwig Reviewed-by: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0e030f5f76b6..b0c814bcc7e3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -218,7 +218,6 @@ struct blk_mq_ops { enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_SHARED = 1 << 1, - BLK_MQ_F_SG_MERGE = 1 << 2, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, -- cgit v1.2.3 From fadccd8fc2d06cf7fd222245d7e04b00fae946cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Feb 2019 09:37:13 +0100 Subject: nvme_ioctl.h: remove duplicate GPL boilerplate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have a ЅPDX header, so no need to duplicate the information. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- include/linux/nvme.h | 10 +--------- include/uapi/linux/nvme_ioctl.h | 9 --------- 2 files changed, 1 insertion(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index bbcc83886899..baa49e6a23cc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1,15 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Definitions for the NVM Express interface * Copyright (c) 2011-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef _LINUX_NVME_H diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h index 6e74b1eaf541..1c215ea1798e 100644 --- a/include/uapi/linux/nvme_ioctl.h +++ b/include/uapi/linux/nvme_ioctl.h @@ -2,15 +2,6 @@ /* * Definitions for the NVM Express ioctl interface * Copyright (c) 2011-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef _UAPI_LINUX_NVME_IOCTL_H -- cgit v1.2.3 From 055d045a7aaeef326f8ab6845519da3157887830 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Feb 2019 09:37:42 +0100 Subject: nvme-tcp.h: fix SPDX header For .h files we need to use /* */ style comments. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- include/linux/nvme-tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 03d87c0550a9..959e0bd9a913 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * NVMe over Fabrics TCP protocol header. * Copyright (c) 2018 Lightbits Labs. All rights reserved. -- cgit v1.2.3 From 8638b2461475ad4c35a957156ecf2425b9b82e85 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Feb 2019 09:33:28 +0100 Subject: nvme-fc: convert to SPDX identifiers Update license to use SPDX-License-Identifier instead of verbose license text. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- include/linux/nvme-fc-driver.h | 10 +--------- include/linux/nvme-fc.h | 14 +------------- 2 files changed, 2 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 91745cc3704c..2bb349035431 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -1,14 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2016, Avago Technologies - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef _NVME_FC_DRIVER_H diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index 36cca93a5ff2..067c9fea64fe 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2016 Avago Technologies. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful. - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, - * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A - * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO - * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. - * See the GNU General Public License for more details, a copy of which - * can be found in the file COPYING included with this package - * */ /* -- cgit v1.2.3 From 5d8762d5684ab997c7ccf2457c8beec7ef972ceb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Feb 2019 09:34:21 +0100 Subject: nvme-rdma: convert to SPDX identifiers Update license to use SPDX-License-Identifier instead of verbose license text. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- include/linux/nvme-rdma.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index a72fd04aa5e1..3aa97b98dc89 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -1,14 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef _LINUX_NVME_RDMA_H -- cgit v1.2.3 From fb7e160019f4abb4082740bfeb27a38f6389c745 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Nov 2018 16:37:38 +0100 Subject: fs: add an iopoll method to struct file_operations This new methods is used to explicitly poll for I/O completion for an iocb. It must be called for any iocb submitted asynchronously (that is with a non-null ki_complete) which has the IOCB_HIPRI flag set. The method is assisted by a new ki_cookie field in struct iocb to store the polling cookie. Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 29d8e2cfed0e..dedcc2e9265c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -310,6 +310,7 @@ struct kiocb { int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ + unsigned int ki_cookie; /* for ->iopoll */ } __randomize_layout; static inline bool is_sync_kiocb(struct kiocb *kiocb) @@ -1787,6 +1788,7 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); + int (*iopoll)(struct kiocb *kiocb, bool spin); int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); -- cgit v1.2.3 From 0bbb280d7b767e7c86a5adfc87c76a6f09ab0423 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 21 Dec 2018 09:10:46 -0700 Subject: block: add bio_set_polled() helper For the upcoming async polled IO, we can't sleep allocating requests. If we do, then we introduce a deadlock where the submitter already has async polled IO in-flight, but can't wait for them to complete since polled requests must be active found and reaped. Utilize the helper in the blockdev DIRECT_IO code. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index bdd11d4c2f05..bb6090aa165d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -826,5 +826,19 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, #endif /* CONFIG_BLK_DEV_INTEGRITY */ +/* + * Mark a bio as polled. Note that for async polled IO, the caller must + * expect -EWOULDBLOCK if we cannot allocate a request (or other resources). + * We cannot block waiting for requests on polled IO, as those completions + * must be found by the caller. This is different than IRQ driven IO, where + * it's safe to wait for IO to complete. + */ +static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) +{ + bio->bi_opf |= REQ_HIPRI; + if (!is_sync_kiocb(kiocb)) + bio->bi_opf |= REQ_NOWAIT; +} + #endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_H */ -- cgit v1.2.3 From 81214bab582eeda068e7904d57b6a3095e8f3855 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Dec 2018 11:12:08 -0700 Subject: iomap: wire up the iopoll method Store the request queue the last bio was submitted to in the iocb private data in addition to the cookie so that we find the right block device. Also refactor the common direct I/O bio submission code into a nice little helper. Signed-off-by: Christoph Hellwig Modified to use bio_set_polled(). Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 9a4258154b25..0fefb5455bda 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -162,6 +162,7 @@ typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, unsigned flags); ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, iomap_dio_end_io_t end_io); +int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); #ifdef CONFIG_SWAP struct file; -- cgit v1.2.3 From 4d633062c1c0794a6b3836b7b55afba4599736e8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Feb 2019 20:40:10 +0800 Subject: block: introduce bvec_nth_page() Single-page bvec can often be seen in small BS workloads, so introduce bvec_nth_page() for avoiding to call nth_page() unnecessarily, which looks not cheap. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bvec.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 30a57b68d017..4376f683c08a 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -51,6 +51,11 @@ struct bvec_iter_all { unsigned done; }; +static inline struct page *bvec_nth_page(struct page *page, int idx) +{ + return idx == 0 ? page : nth_page(page, idx); +} + /* * various member access, note that bio_data should of course not be used * on highmem page vectors @@ -87,8 +92,8 @@ struct bvec_iter_all { PAGE_SIZE - bvec_iter_offset((bvec), (iter))) #define bvec_iter_page(bvec, iter) \ - nth_page(mp_bvec_iter_page((bvec), (iter)), \ - mp_bvec_iter_page_idx((bvec), (iter))) + bvec_nth_page(mp_bvec_iter_page((bvec), (iter)), \ + mp_bvec_iter_page_idx((bvec), (iter))) #define bvec_iter_bvec(bvec, iter) \ ((struct bio_vec) { \ @@ -171,7 +176,7 @@ static inline void mp_bvec_last_segment(const struct bio_vec *bvec, unsigned total = bvec->bv_offset + bvec->bv_len; unsigned last_page = (total - 1) / PAGE_SIZE; - seg->bv_page = nth_page(bvec->bv_page, last_page); + seg->bv_page = bvec_nth_page(bvec->bv_page, last_page); /* the whole segment is inside the last page */ if (bvec->bv_offset >= last_page * PAGE_SIZE) { -- cgit v1.2.3 From 594b9a89af8e7629e95a4cd844d188361be32790 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Feb 2019 20:40:13 +0800 Subject: block: introduce mp_bvec_for_each_page() for iterating over page mp_bvec_for_each_segment() is a bit big for the iteration, so introduce a light-weight helper for iterating over pages, then 32bytes stack space can be saved. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bvec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 4376f683c08a..87e82e503a52 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -188,4 +188,9 @@ static inline void mp_bvec_last_segment(const struct bio_vec *bvec, } } +#define mp_bvec_for_each_page(pg, bv, i) \ + for (i = (bv)->bv_offset / PAGE_SIZE; \ + (i <= (((bv)->bv_offset + (bv)->bv_len - 1) / PAGE_SIZE)) && \ + (pg = bvec_nth_page((bv)->bv_page, i)); i += 1) + #endif /* __LINUX_BVEC_ITER_H */ -- cgit v1.2.3 From 5b88a17cfdeba75e0092bab2c79aaf7d9e7db482 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Feb 2019 11:00:18 -0500 Subject: block: optimize bvec iteration in bvec_iter_advance There is no need to only iterate in chunks of PAGE_SIZE or less in bvec_iter_advance, given that the callers pass in the chunk length that they are operating on - either that already is less than PAGE_SIZE because they do classic page-based iteration, or it is larger because the caller operates on multi-page bvecs. This should help shaving off a few cycles of the I/O hot path. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bvec.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 87e82e503a52..f6275c4da13a 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -112,14 +112,15 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, } while (bytes) { - unsigned iter_len = bvec_iter_len(bv, *iter); - unsigned len = min(bytes, iter_len); + const struct bio_vec *cur = bv + iter->bi_idx; + unsigned len = min3(bytes, iter->bi_size, + cur->bv_len - iter->bi_bvec_done); bytes -= len; iter->bi_size -= len; iter->bi_bvec_done += len; - if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { + if (iter->bi_bvec_done == cur->bv_len) { iter->bi_bvec_done = 0; iter->bi_idx++; } -- cgit v1.2.3