summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-14 13:15:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-14 13:15:14 -0700
commitdddec01eb8e2b56267b37a6f9f0997a64b4e0b2a (patch)
treeb6d8bfbce9abd105384b9d116499afbe306b9c22 /block
parent7daf705f362e349983e92037a198b8821db198af (diff)
parent32502b8413a77b54b9e19809404109590c32dfb7 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (37 commits) splice: fix generic_file_splice_read() race with page invalidation ramfs: enable splice write drivers/block/pktcdvd.c: avoid useless memset cdrom: revert commit 22a9189 (cdrom: use kmalloced buffers instead of buffers on stack) scsi: sr avoids useless buffer allocation block: blk_rq_map_kern uses the bounce buffers for stack buffers block: add blk_queue_update_dma_pad DAC960: push down BKL pktcdvd: push BKL down into driver paride: push ioctl down into driver block: use get_unaligned_* helpers block: extend queue_flag bitops block: request_module(): use format string Add bvec_merge_data to handle stacked devices and ->merge_bvec() block: integrity flags can't use bit ops on unsigned short cmdfilter: extend default read filter sg: fix odd style (extra parenthesis) introduced by cmd filter patch block: add bounce support to blk_rq_map_user_iov cfq-iosched: get rid of enable_idle being unused warning allow userspace to modify scsi command filter on per device basis ...
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig12
-rw-r--r--block/Makefile4
-rw-r--r--block/as-iosched.c18
-rw-r--r--block/blk-core.c19
-rw-r--r--block/blk-integrity.c381
-rw-r--r--block/blk-map.c6
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/blk-settings.c24
-rw-r--r--block/blk.h8
-rw-r--r--block/blktrace.c45
-rw-r--r--block/bsg.c38
-rw-r--r--block/cfq-iosched.c83
-rw-r--r--block/cmd-filter.c334
-rw-r--r--block/elevator.c8
-rw-r--r--block/genhd.c12
-rw-r--r--block/scsi_ioctl.c121
16 files changed, 959 insertions, 157 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 3e97f2bc446f..1ab7c15c8d7a 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -81,6 +81,18 @@ config BLK_DEV_BSG
If unsure, say N.
+config BLK_DEV_INTEGRITY
+ bool "Block layer data integrity support"
+ ---help---
+ Some storage devices allow extra information to be
+ stored/retrieved to help protect the data. The block layer
+ data integrity option provides hooks which can be used by
+ filesystems to ensure better data integrity.
+
+ Say yes here if you have a storage device that provides the
+ T10/SCSI Data Integrity Field or the T13/ATA External Path
+ Protection. If in doubt, say N.
+
endif # BLOCK
config BLOCK_COMPAT
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d79594..208000b0750d 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,7 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o
+ blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
+ cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
@@ -14,3 +15,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 743f33a01a07..9735acb5b4f5 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -151,6 +151,7 @@ enum arq_state {
static DEFINE_PER_CPU(unsigned long, ioc_count);
static struct completion *ioc_gone;
+static DEFINE_SPINLOCK(ioc_gone_lock);
static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
static void as_antic_stop(struct as_data *ad);
@@ -164,8 +165,19 @@ static void free_as_io_context(struct as_io_context *aic)
{
kfree(aic);
elv_ioc_count_dec(ioc_count);
- if (ioc_gone && !elv_ioc_count_read(ioc_count))
- complete(ioc_gone);
+ if (ioc_gone) {
+ /*
+ * AS scheduler is exiting, grab exit lock and check
+ * the pending io context count. If it hits zero,
+ * complete ioc_gone and set it back to NULL.
+ */
+ spin_lock(&ioc_gone_lock);
+ if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+ complete(ioc_gone);
+ ioc_gone = NULL;
+ }
+ spin_unlock(&ioc_gone_lock);
+ }
}
static void as_trim(struct io_context *ioc)
@@ -1493,7 +1505,7 @@ static void __exit as_exit(void)
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
if (elv_ioc_count_read(ioc_count))
- wait_for_completion(ioc_gone);
+ wait_for_completion(&all_gone);
synchronize_rcu();
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 1905aaba49fb..dbc7f42b5d2b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -143,6 +143,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio->bi_size -= nbytes;
bio->bi_sector += (nbytes >> 9);
+
+ if (bio_integrity(bio))
+ bio_integrity_advance(bio, nbytes);
+
if (bio->bi_size == 0)
bio_endio(bio, error);
} else {
@@ -201,8 +205,7 @@ void blk_plug_device(struct request_queue *q)
if (blk_queue_stopped(q))
return;
- if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
- __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+ if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
}
@@ -217,10 +220,9 @@ int blk_remove_plug(struct request_queue *q)
{
WARN_ON(!irqs_disabled());
- if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+ if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
return 0;
- queue_flag_clear(QUEUE_FLAG_PLUGGED, q);
del_timer(&q->unplug_timer);
return 1;
}
@@ -324,8 +326,7 @@ void blk_start_queue(struct request_queue *q)
* one level of recursion is ok and is much faster than kicking
* the unplug handling
*/
- if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
- queue_flag_set(QUEUE_FLAG_REENTER, q);
+ if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
@@ -390,8 +391,7 @@ void __blk_run_queue(struct request_queue *q)
* handling reinvoke the handler shortly if we already got there.
*/
if (!elv_queue_empty(q)) {
- if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
- queue_flag_set(QUEUE_FLAG_REENTER, q);
+ if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
@@ -1381,6 +1381,9 @@ end_io:
*/
blk_partition_remap(bio);
+ if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
+ goto end_io;
+
if (old_sector != -1)
blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
old_sector);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
new file mode 100644
index 000000000000..3f1a8478cc38
--- /dev/null
+++ b/block/blk-integrity.c
@@ -0,0 +1,381 @@
+/*
+ * blk-integrity.c - Block layer data integrity extensions
+ *
+ * Copyright (C) 2007, 2008 Oracle Corporation
+ * Written by: Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/bio.h>
+#include <linux/scatterlist.h>
+
+#include "blk.h"
+
+static struct kmem_cache *integrity_cachep;
+
+/**
+ * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
+ * @rq: request with integrity metadata attached
+ *
+ * Description: Returns the number of elements required in a
+ * scatterlist corresponding to the integrity metadata in a request.
+ */
+int blk_rq_count_integrity_sg(struct request *rq)
+{
+ struct bio_vec *iv, *ivprv;
+ struct req_iterator iter;
+ unsigned int segments;
+
+ ivprv = NULL;
+ segments = 0;
+
+ rq_for_each_integrity_segment(iv, rq, iter) {
+
+ if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+ segments++;
+
+ ivprv = iv;
+ }
+
+ return segments;
+}
+EXPORT_SYMBOL(blk_rq_count_integrity_sg);
+
+/**
+ * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
+ * @rq: request with integrity metadata attached
+ * @sglist: target scatterlist
+ *
+ * Description: Map the integrity vectors in request into a
+ * scatterlist. The scatterlist must be big enough to hold all
+ * elements. I.e. sized using blk_rq_count_integrity_sg().
+ */
+int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
+{
+ struct bio_vec *iv, *ivprv;
+ struct req_iterator iter;
+ struct scatterlist *sg;
+ unsigned int segments;
+
+ ivprv = NULL;
+ sg = NULL;
+ segments = 0;
+
+ rq_for_each_integrity_segment(iv, rq, iter) {
+
+ if (ivprv) {
+ if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+ goto new_segment;
+
+ sg->length += iv->bv_len;
+ } else {
+new_segment:
+ if (!sg)
+ sg = sglist;
+ else {
+ sg->page_link &= ~0x02;
+ sg = sg_next(sg);
+ }
+
+ sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset);
+ segments++;
+ }
+
+ ivprv = iv;
+ }
+
+ if (sg)
+ sg_mark_end(sg);
+
+ return segments;
+}
+EXPORT_SYMBOL(blk_rq_map_integrity_sg);
+
+/**
+ * blk_integrity_compare - Compare integrity profile of two block devices
+ * @b1: Device to compare
+ * @b2: Device to compare
+ *
+ * Description: Meta-devices like DM and MD need to verify that all
+ * sub-devices use the same integrity format before advertising to
+ * upper layers that they can send/receive integrity metadata. This
+ * function can be used to check whether two block devices have
+ * compatible integrity formats.
+ */
+int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
+{
+ struct blk_integrity *b1 = bd1->bd_disk->integrity;
+ struct blk_integrity *b2 = bd2->bd_disk->integrity;
+
+ BUG_ON(bd1->bd_disk == NULL);
+ BUG_ON(bd2->bd_disk == NULL);
+
+ if (!b1 || !b2)
+ return 0;
+
+ if (b1->sector_size != b2->sector_size) {
+ printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->sector_size, b2->sector_size);
+ return -1;
+ }
+
+ if (b1->tuple_size != b2->tuple_size) {
+ printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->tuple_size, b2->tuple_size);
+ return -1;
+ }
+
+ if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
+ printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->tag_size, b2->tag_size);
+ return -1;
+ }
+
+ if (strcmp(b1->name, b2->name)) {
+ printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
+ bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+ b1->name, b2->name);
+ return -1;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(blk_integrity_compare);
+
+struct integrity_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct blk_integrity *, char *);
+ ssize_t (*store)(struct blk_integrity *, const char *, size_t);
+};
+
+static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *page)
+{
+ struct blk_integrity *bi =
+ container_of(kobj, struct blk_integrity, kobj);
+ struct integrity_sysfs_entry *entry =
+ container_of(attr, struct integrity_sysfs_entry, attr);
+
+ return entry->show(bi, page);
+}
+
+static ssize_t integrity_attr_store(struct kobject *kobj,
+ struct attribute *attr, const char *page,
+ size_t count)
+{
+ struct blk_integrity *bi =
+ container_of(kobj, struct blk_integrity, kobj);
+ struct integrity_sysfs_entry *entry =
+ container_of(attr, struct integrity_sysfs_entry, attr);
+ ssize_t ret = 0;
+
+ if (entry->store)
+ ret = entry->store(bi, page, count);
+
+ return ret;
+}
+
+static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
+{
+ if (bi != NULL && bi->name != NULL)
+ return sprintf(page, "%s\n", bi->name);
+ else
+ return sprintf(page, "none\n");
+}
+
+static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
+{
+ if (bi != NULL)
+ return sprintf(page, "%u\n", bi->tag_size);
+ else
+ return sprintf(page, "0\n");
+}
+
+static ssize_t integrity_read_store(struct blk_integrity *bi,
+ const char *page, size_t count)
+{
+ char *p = (char *) page;
+ unsigned long val = simple_strtoul(p, &p, 10);
+
+ if (val)
+ bi->flags |= INTEGRITY_FLAG_READ;
+ else
+ bi->flags &= ~INTEGRITY_FLAG_READ;
+
+ return count;
+}
+
+static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
+{
+ return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0);
+}
+
+static ssize_t integrity_write_store(struct blk_integrity *bi,
+ const char *page, size_t count)
+{
+ char *p = (char *) page;
+ unsigned long val = simple_strtoul(p, &p, 10);
+
+ if (val)
+ bi->flags |= INTEGRITY_FLAG_WRITE;
+ else
+ bi->flags &= ~INTEGRITY_FLAG_WRITE;
+
+ return count;
+}
+
+static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
+{
+ return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0);
+}
+
+static struct integrity_sysfs_entry integrity_format_entry = {
+ .attr = { .name = "format", .mode = S_IRUGO },
+ .show = integrity_format_show,
+};
+
+static struct integrity_sysfs_entry integrity_tag_size_entry = {
+ .attr = { .name = "tag_size", .mode = S_IRUGO },
+ .show = integrity_tag_size_show,
+};
+
+static struct integrity_sysfs_entry integrity_read_entry = {
+ .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
+ .show = integrity_read_show,
+ .store = integrity_read_store,
+};
+
+static struct integrity_sysfs_entry integrity_write_entry = {
+ .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
+ .show = integrity_write_show,
+ .store = integrity_write_store,
+};
+
+static struct attribute *integrity_attrs[] = {
+ &integrity_format_entry.attr,
+ &integrity_tag_size_entry.attr,
+ &integrity_read_entry.attr,
+ &integrity_write_entry.attr,
+ NULL,
+};
+
+static struct sysfs_ops integrity_ops = {
+ .show = &integrity_attr_show,
+ .store = &integrity_attr_store,
+};
+
+static int __init blk_dev_integrity_init(void)
+{
+ integrity_cachep = kmem_cache_create("blkdev_integrity",
+ sizeof(struct blk_integrity),
+ 0, SLAB_PANIC, NULL);
+ return 0;
+}
+subsys_initcall(blk_dev_integrity_init);
+
+static void blk_integrity_release(struct kobject *kobj)
+{
+ struct blk_integrity *bi =
+ container_of(kobj, struct blk_integrity, kobj);
+
+ kmem_cache_free(integrity_cachep, bi);
+}
+
+static struct kobj_type integrity_ktype = {
+ .default_attrs = integrity_attrs,
+ .sysfs_ops = &integrity_ops,
+ .release = blk_integrity_release,
+};
+
+/**
+ * blk_integrity_register - Register a gendisk as being integrity-capable
+ * @disk: struct gendisk pointer to make integrity-aware
+ * @template: integrity profile
+ *
+ * Description: When a device needs to advertise itself as being able
+ * to send/receive integrity metadata it must use this function to
+ * register the capability with the block layer. The template is a
+ * blk_integrity struct with values appropriate for the underlying
+ * hardware. See Documentation/block/data-integrity.txt.
+ */
+int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
+{
+ struct blk_integrity *bi;
+
+ BUG_ON(disk == NULL);
+ BUG_ON(template == NULL);
+
+ if (disk->integrity == NULL) {
+ bi = kmem_cache_alloc(integrity_cachep,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!bi)
+ return -1;
+
+ if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
+ &disk->dev.kobj, "%s", "integrity")) {
+ kmem_cache_free(integrity_cachep, bi);
+ return -1;
+ }
+
+ kobject_uevent(&bi->kobj, KOBJ_ADD);
+
+ bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
+ bi->sector_size = disk->queue->hardsect_size;
+ disk->integrity = bi;
+ } else
+ bi = disk->integrity;
+
+ /* Use the provided profile as template */
+ bi->name = template->name;
+ bi->generate_fn = template->generate_fn;
+ bi->verify_fn = template->verify_fn;
+ bi->tuple_size = template->tuple_size;
+ bi->set_tag_fn = template->set_tag_fn;
+ bi->get_tag_fn = template->get_tag_fn;
+ bi->tag_size = template->tag_size;
+
+ return 0;
+}
+EXPORT_SYMBOL(blk_integrity_register);
+
+/**
+ * blk_integrity_unregister - Remove block integrity profile
+ * @disk: disk whose integrity profile to deallocate
+ *
+ * Description: This function frees all memory used by the block
+ * integrity profile. To be called at device teardown.
+ */
+void blk_integrity_unregister(struct gendisk *disk)
+{
+ struct blk_integrity *bi;
+
+ if (!disk || !disk->integrity)
+ return;
+
+ bi = disk->integrity;
+
+ kobject_uevent(&bi->kobj, KOBJ_REMOVE);
+ kobject_del(&bi->kobj);
+ kobject_put(&disk->dev.kobj);
+ kmem_cache_free(integrity_cachep, bi);
+}
+EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/blk-map.c b/block/blk-map.c
index 0b1af5a3537c..ddd96fb11a7d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -210,6 +210,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
if (!bio_flagged(bio, BIO_USER_MAPPED))
rq->cmd_flags |= REQ_COPY_USER;
+ blk_queue_bounce(q, &bio);
bio_get(bio);
blk_rq_bio_prep(q, rq, bio);
rq->buffer = rq->data = NULL;
@@ -268,6 +269,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
int reading = rq_data_dir(rq) == READ;
int do_copy = 0;
struct bio *bio;
+ unsigned long stack_mask = ~(THREAD_SIZE - 1);
if (len > (q->max_hw_sectors << 9))
return -EINVAL;
@@ -278,6 +280,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
alignment = queue_dma_alignment(q) | q->dma_pad_mask;
do_copy = ((kaddr & alignment) || (len & alignment));
+ if (!((kaddr & stack_mask) ^
+ ((unsigned long)current->stack & stack_mask)))
+ do_copy = 1;
+
if (do_copy)
bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
else
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 651136aae76e..5efc9e7a68b7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -441,6 +441,9 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special)
return 0;
+ if (blk_integrity_rq(req) != blk_integrity_rq(next))
+ return 0;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8dd86418f35d..dfc77012843f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -302,11 +302,10 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
* @q: the request queue for the device
* @mask: pad mask
*
- * Set pad mask. Direct IO requests are padded to the mask specified.
+ * Set dma pad mask.
*
- * Appending pad buffer to a request modifies ->data_len such that it
- * includes the pad buffer. The original requested data length can be
- * obtained using blk_rq_raw_data_len().
+ * Appending pad buffer to a request modifies the last entry of a
+ * scatter list such that it includes the pad buffer.
**/
void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
{
@@ -315,6 +314,23 @@ void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
EXPORT_SYMBOL(blk_queue_dma_pad);
/**
+ * blk_queue_update_dma_pad - update pad mask
+ * @q: the request queue for the device
+ * @mask: pad mask
+ *
+ * Update dma pad mask.
+ *
+ * Appending pad buffer to a request modifies the last entry of a
+ * scatter list such that it includes the pad buffer.
+ **/
+void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
+{
+ if (mask > q->dma_pad_mask)
+ q->dma_pad_mask = mask;
+}
+EXPORT_SYMBOL(blk_queue_update_dma_pad);
+
+/**
* blk_queue_dma_drain - Set up a drain buffer for excess dma.
* @q: the request queue for the device
* @dma_drain_needed: fn which returns non-zero if drain is necessary
diff --git a/block/blk.h b/block/blk.h
index 59776ab4742a..c79f30e1df52 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,4 +51,12 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
return q->nr_congestion_off;
}
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+#define rq_for_each_integrity_segment(bvl, _rq, _iter) \
+ __rq_for_each_bio(_iter.bio, _rq) \
+ bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i)
+
+#endif /* BLK_DEV_INTEGRITY */
+
#endif
diff --git a/block/blktrace.c b/block/blktrace.c
index 8d3a27780260..eb9651ccb241 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -244,6 +244,7 @@ err:
static void blk_trace_cleanup(struct blk_trace *bt)
{
relay_close(bt->rchan);
+ debugfs_remove(bt->msg_file);
debugfs_remove(bt->dropped_file);
blk_remove_tree(bt->dir);
free_percpu(bt->sequence);
@@ -291,6 +292,44 @@ static const struct file_operations blk_dropped_fops = {
.read = blk_dropped_read,
};
+static int blk_msg_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->i_private;
+
+ return 0;
+}
+
+static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ char *msg;
+ struct blk_trace *bt;
+
+ if (count > BLK_TN_MAX_MSG)
+ return -EINVAL;
+
+ msg = kmalloc(count, GFP_KERNEL);
+ if (msg == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(msg, buffer, count)) {
+ kfree(msg);
+ return -EFAULT;
+ }
+
+ bt = filp->private_data;
+ __trace_note_message(bt, "%s", msg);
+ kfree(msg);
+
+ return count;
+}
+
+static const struct file_operations blk_msg_fops = {
+ .owner = THIS_MODULE,
+ .open = blk_msg_open,
+ .write = blk_msg_write,
+};
+
/*
* Keep track of how many times we encountered a full subbuffer, to aid
* the user space app in telling how many lost events there were.
@@ -380,6 +419,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!bt->dropped_file)
goto err;
+ bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
+ if (!bt->msg_file)
+ goto err;
+
bt->rchan = relay_open("trace", dir, buts->buf_size,
buts->buf_nr, &blk_relay_callbacks, bt);
if (!bt->rchan)
@@ -409,6 +452,8 @@ err:
if (dir)
blk_remove_tree(dir);
if (bt) {
+ if (bt->msg_file)
+ debugfs_remove(bt->msg_file);
if (bt->dropped_file)
debugfs_remove(bt->dropped_file);
free_percpu(bt->sequence);
diff --git a/block/bsg.c b/block/bsg.c
index 54d617f7df3e..93e757d7174b 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -44,11 +44,12 @@ struct bsg_device {
char name[BUS_ID_SIZE];
int max_queue;
unsigned long flags;
+ struct blk_scsi_cmd_filter *cmd_filter;
+ mode_t *f_mode;
};
enum {
BSG_F_BLOCK = 1,
- BSG_F_WRITE_PERM = 2,
};
#define BSG_DEFAULT_CMDS 64
@@ -172,7 +173,7 @@ unlock:
}
static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
- struct sg_io_v4 *hdr, int has_write_perm)
+ struct sg_io_v4 *hdr, struct bsg_device *bd)
{
if (hdr->request_len > BLK_MAX_CDB) {
rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -185,7 +186,8 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
return -EFAULT;
if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
- if (blk_verify_command(rq->cmd, has_write_perm))
+ if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
+ bd->f_mode))
return -EPERM;
} else if (!capable(CAP_SYS_RAWIO))
return -EPERM;
@@ -263,8 +265,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
if (!rq)
return ERR_PTR(-ENOMEM);
- ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
- &bd->flags));
+ ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
if (ret)
goto out;
@@ -566,12 +567,23 @@ static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
set_bit(BSG_F_BLOCK, &bd->flags);
}
-static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file)
+static void bsg_set_cmd_filter(struct bsg_device *bd,
+ struct file *file)
{
- if (file->f_mode & FMODE_WRITE)
- set_bit(BSG_F_WRITE_PERM, &bd->flags);
- else
- clear_bit(BSG_F_WRITE_PERM, &bd->flags);
+ struct inode *inode;
+ struct gendisk *disk;
+
+ if (!file)
+ return;
+
+ inode = file->f_dentry->d_inode;
+ if (!inode)
+ return;
+
+ disk = inode->i_bdev->bd_disk;
+
+ bd->cmd_filter = &disk->cmd_filter;
+ bd->f_mode = &file->f_mode;
}
/*
@@ -595,6 +607,8 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
dprintk("%s: read %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
+
bytes_read = 0;
ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
*ppos = bytes_read;
@@ -668,7 +682,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
dprintk("%s: write %Zd bytes\n", bd->name, count);
bsg_set_block(bd, file);
- bsg_set_write_perm(bd, file);
+ bsg_set_cmd_filter(bd, file);
bytes_written = 0;
ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -772,7 +786,9 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
}
bd->queue = rq;
+
bsg_set_block(bd, file);
+ bsg_set_cmd_filter(bd, file);
atomic_set(&bd->ref_count, 1);
mutex_lock(&bsg_mutex);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d01b411c72f0..1e2aff812ee2 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -11,6 +11,7 @@
#include <linux/elevator.h>
#include <linux/rbtree.h>
#include <linux/ioprio.h>
+#include <linux/blktrace_api.h>
/*
* tunables
@@ -41,13 +42,14 @@ static int cfq_slice_idle = HZ / 125;
#define RQ_CIC(rq) \
((struct cfq_io_context *) (rq)->elevator_private)
-#define RQ_CFQQ(rq) ((rq)->elevator_private2)
+#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;
static DEFINE_PER_CPU(unsigned long, ioc_count);
static struct completion *ioc_gone;
+static DEFINE_SPINLOCK(ioc_gone_lock);
#define CFQ_PRIO_LISTS IOPRIO_BE_NR
#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -155,6 +157,7 @@ struct cfq_queue {
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
+ pid_t pid;
};
enum cfqq_state_flags {
@@ -198,6 +201,11 @@ CFQ_CFQQ_FNS(slice_new);
CFQ_CFQQ_FNS(sync);
#undef CFQ_CFQQ_FNS
+#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
+ blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
+#define cfq_log(cfqd, fmt, args...) \
+ blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
+
static void cfq_dispatch_insert(struct request_queue *, struct request *);
static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
struct io_context *, gfp_t);
@@ -234,8 +242,10 @@ static inline int cfq_bio_sync(struct bio *bio)
*/
static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
{
- if (cfqd->busy_queues)
+ if (cfqd->busy_queues) {
+ cfq_log(cfqd, "schedule dispatch");
kblockd_schedule_work(&cfqd->unplug_work);
+ }
}
static int cfq_queue_empty(struct request_queue *q)
@@ -270,6 +280,7 @@ static inline void
cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+ cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
}
/*
@@ -539,6 +550,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
*/
static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
BUG_ON(cfq_cfqq_on_rr(cfqq));
cfq_mark_cfqq_on_rr(cfqq);
cfqd->busy_queues++;
@@ -552,6 +564,7 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
*/
static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
+ cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
BUG_ON(!cfq_cfqq_on_rr(cfqq));
cfq_clear_cfqq_on_rr(cfqq);
@@ -638,6 +651,8 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
struct cfq_data *cfqd = q->elevator->elevator_data;
cfqd->rq_in_driver++;
+ cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
+ cfqd->rq_in_driver);
/*
* If the depth is larger 1, it really could be queueing. But lets
@@ -657,6 +672,8 @@ static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
WARN_ON(!cfqd->rq_in_driver);
cfqd->rq_in_driver--;
+ cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
+ cfqd->rq_in_driver);
}
static void cfq_remove_request(struct request *rq)
@@ -746,6 +763,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
{
if (cfqq) {
+ cfq_log_cfqq(cfqd, cfqq, "set_active");
cfqq->slice_end = 0;
cfq_clear_cfqq_must_alloc_slice(cfqq);
cfq_clear_cfqq_fifo_expire(cfqq);
@@ -763,6 +781,8 @@ static void
__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
int timed_out)
{
+ cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
+
if (cfq_cfqq_wait_request(cfqq))
del_timer(&cfqd->idle_slice_timer);
@@ -772,8 +792,10 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
/*
* store what was left of this slice, if the queue idled/timed out
*/
- if (timed_out && !cfq_cfqq_slice_new(cfqq))
+ if (timed_out && !cfq_cfqq_slice_new(cfqq)) {
cfqq->slice_resid = cfqq->slice_end - jiffies;
+ cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
+ }
cfq_resort_rr_list(cfqd, cfqq);
@@ -866,6 +888,12 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
return;
/*
+ * still requests with the driver, don't idle
+ */
+ if (cfqd->rq_in_driver)
+ return;
+
+ /*
* task has exited, don't wait
*/
cic = cfqd->activ