From aadbe266f2f89ccc68b52f4effc7b3a8b29521ef Mon Sep 17 00:00:00 2001 From: Andrei Warkentin Date: Wed, 28 Mar 2012 18:41:22 +0100 Subject: dm exception store: fix init error path Call the correct exit function on failure in dm_exception_store_init. Signed-off-by: Andrei Warkentin Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-exception-store.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 042e71996569..aa70f7d43a1a 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -283,7 +283,7 @@ int dm_exception_store_init(void) return 0; persistent_fail: - dm_persistent_snapshot_exit(); + dm_transient_snapshot_exit(); transient_fail: return r; } -- cgit v1.2.3 From aeb2deae2660a1773c83d3c6e9e6575daa3855d6 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 28 Mar 2012 18:41:22 +0100 Subject: dm crypt: fix mempool deadlock This patch fixes a possible deadlock in dm-crypt's mempool use. Currently, dm-crypt reserves a mempool of MIN_BIO_PAGES reserved pages. It allocates first MIN_BIO_PAGES with non-failing allocation (the allocation cannot fail and waits until the mempool is refilled). Further pages are allocated with different gfp flags that allow failing. Because allocations may be done in parallel, this code can deadlock. Example: There are two processes, each tries to allocate MIN_BIO_PAGES and the processes run simultaneously. It may end up in a situation where each process allocates (MIN_BIO_PAGES / 2) pages. The mempool is exhausted. Each process waits for more pages to be freed to the mempool, which never happens. To avoid this deadlock scenario, this patch changes the code so that only the first page is allocated with non-failing gfp mask. Allocation of further pages may fail. Signed-off-by: Mikulas Patocka Cc: stable@kernel.org Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index db6b51639cee..6c83ad901770 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -176,7 +176,6 @@ struct crypt_config { #define MIN_IOS 16 #define MIN_POOL_PAGES 32 -#define MIN_BIO_PAGES 8 static struct kmem_cache *_crypt_io_pool; @@ -848,12 +847,11 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, } /* - * if additional pages cannot be allocated without waiting, - * return a partially allocated bio, the caller will then try - * to allocate additional bios while submitting this partial bio + * If additional pages cannot be allocated without waiting, + * return a partially-allocated bio. The caller will then try + * to allocate more bios while submitting this partial bio. */ - if (i == (MIN_BIO_PAGES - 1)) - gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; + gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; len = (size > PAGE_SIZE) ? PAGE_SIZE : size; -- cgit v1.2.3 From 72c6e7afc43e19f68a31dea204fc366624d6eee9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 28 Mar 2012 18:41:22 +0100 Subject: dm crypt: add missing error handling Always set io->error to -EIO when an error is detected in dm-crypt. There were cases where an error code would be set only if we finish processing the last sector. If there were other encryption operations in flight, the error would be ignored and bio would be returned with success as if no error happened. This bug is present in kcryptd_crypt_write_convert, kcryptd_crypt_read_convert and kcryptd_async_done. Signed-off-by: Mikulas Patocka Cc: stable@kernel.org Reviewed-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 6c83ad901770..87de0d6b49f2 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1044,16 +1044,14 @@ static void kcryptd_queue_io(struct dm_crypt_io *io) queue_work(cc->io_queue, &io->work); } -static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, - int error, int async) +static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) { struct bio *clone = io->ctx.bio_out; struct crypt_config *cc = io->target->private; - if (unlikely(error < 0)) { + if (unlikely(io->error < 0)) { crypt_free_buffer_pages(cc, clone); bio_put(clone); - io->error = -EIO; crypt_dec_pending(io); return; } @@ -1104,12 +1102,16 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) sector += bio_sectors(clone); crypt_inc_pending(io); + r = crypt_convert(cc, &io->ctx); + if (r < 0) + io->error = -EIO; + crypt_finished = atomic_dec_and_test(&io->ctx.pending); /* Encryption was already finished, submit io now */ if (crypt_finished) { - kcryptd_crypt_write_io_submit(io, r, 0); + kcryptd_crypt_write_io_submit(io, 0); /* * If there was an error, do not try next fragments. @@ -1160,11 +1162,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_dec_pending(io); } -static void kcryptd_crypt_read_done(struct dm_crypt_io *io, int error) +static void kcryptd_crypt_read_done(struct dm_crypt_io *io) { - if (unlikely(error < 0)) - io->error = -EIO; - crypt_dec_pending(io); } @@ -1179,9 +1178,11 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) io->sector); r = crypt_convert(cc, &io->ctx); + if (r < 0) + io->error = -EIO; if (atomic_dec_and_test(&io->ctx.pending)) - kcryptd_crypt_read_done(io, r); + kcryptd_crypt_read_done(io); crypt_dec_pending(io); } @@ -1202,15 +1203,18 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post) error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq); + if (error < 0) + io->error = -EIO; + mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); if (!atomic_dec_and_test(&ctx->pending)) return; if (bio_data_dir(io->base_bio) == READ) - kcryptd_crypt_read_done(io, error); + kcryptd_crypt_read_done(io); else - kcryptd_crypt_write_io_submit(io, error, 1); + kcryptd_crypt_write_io_submit(io, 1); } static void kcryptd_crypt(struct work_struct *work) -- cgit v1.2.3 From 6f94a4c45a6f744383f9f695dde019998db3df55 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 28 Mar 2012 18:41:23 +0100 Subject: dm thin: fix stacked bi_next usage Avoid using the bi_next field for the holder of a cell when deferring bios because a stacked device below might change it. Store the holder in a new field in struct cell instead. When a cell is created, the bio that triggered creation (the holder) was added to the same bio list as subsequent bios. In some cases we pass this holder bio directly to devices underneath. If those devices use the bi_next field there will be trouble... This also simplifies some code that had to work out which bio was the holder. Signed-off-by: Joe Thornber Cc: stable@kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 124 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 73 insertions(+), 51 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index c3087575fef0..da2f0217df66 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -124,7 +124,7 @@ struct cell { struct hlist_node list; struct bio_prison *prison; struct cell_key key; - unsigned count; + struct bio *holder; struct bio_list bios; }; @@ -220,55 +220,60 @@ static struct cell *__search_bucket(struct hlist_head *bucket, * This may block if a new cell needs allocating. You must ensure that * cells will be unlocked even if the calling thread is blocked. * - * Returns the number of entries in the cell prior to the new addition - * or < 0 on failure. + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. */ static int bio_detain(struct bio_prison *prison, struct cell_key *key, struct bio *inmate, struct cell **ref) { - int r; + int r = 1; unsigned long flags; uint32_t hash = hash_key(prison, key); - struct cell *uninitialized_var(cell), *cell2 = NULL; + struct cell *cell, *cell2; BUG_ON(hash > prison->nr_buckets); spin_lock_irqsave(&prison->lock, flags); + cell = __search_bucket(prison->cells + hash, key); + if (cell) { + bio_list_add(&cell->bios, inmate); + goto out; + } - if (!cell) { - /* - * Allocate a new cell - */ - spin_unlock_irqrestore(&prison->lock, flags); - cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); - spin_lock_irqsave(&prison->lock, flags); + /* + * Allocate a new cell + */ + spin_unlock_irqrestore(&prison->lock, flags); + cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); + spin_lock_irqsave(&prison->lock, flags); - /* - * We've been unlocked, so we have to double check that - * nobody else has inserted this cell in the meantime. - */ - cell = __search_bucket(prison->cells + hash, key); + /* + * We've been unlocked, so we have to double check that + * nobody else has inserted this cell in the meantime. + */ + cell = __search_bucket(prison->cells + hash, key); + if (cell) { + mempool_free(cell2, prison->cell_pool); + bio_list_add(&cell->bios, inmate); + goto out; + } + + /* + * Use new cell. + */ + cell = cell2; - if (!cell) { - cell = cell2; - cell2 = NULL; + cell->prison = prison; + memcpy(&cell->key, key, sizeof(cell->key)); + cell->holder = inmate; + bio_list_init(&cell->bios); + hlist_add_head(&cell->list, prison->cells + hash); - cell->prison = prison; - memcpy(&cell->key, key, sizeof(cell->key)); - cell->count = 0; - bio_list_init(&cell->bios); - hlist_add_head(&cell->list, prison->cells + hash); - } - } + r = 0; - r = cell->count++; - bio_list_add(&cell->bios, inmate); +out: spin_unlock_irqrestore(&prison->lock, flags); - if (cell2) - mempool_free(cell2, prison->cell_pool); - *ref = cell; return r; @@ -283,8 +288,8 @@ static void __cell_release(struct cell *cell, struct bio_list *inmates) hlist_del(&cell->list); - if (inmates) - bio_list_merge(inmates, &cell->bios); + bio_list_add(inmates, cell->holder); + bio_list_merge(inmates, &cell->bios); mempool_free(cell, prison->cell_pool); } @@ -305,22 +310,44 @@ static void cell_release(struct cell *cell, struct bio_list *bios) * bio may be in the cell. This function releases the cell, and also does * a sanity check. */ +static void __cell_release_singleton(struct cell *cell, struct bio *bio) +{ + hlist_del(&cell->list); + BUG_ON(cell->holder != bio); + BUG_ON(!bio_list_empty(&cell->bios)); +} + static void cell_release_singleton(struct cell *cell, struct bio *bio) { - struct bio_prison *prison = cell->prison; - struct bio_list bios; - struct bio *b; unsigned long flags; - - bio_list_init(&bios); + struct bio_prison *prison = cell->prison; spin_lock_irqsave(&prison->lock, flags); - __cell_release(cell, &bios); + __cell_release_singleton(cell, bio); spin_unlock_irqrestore(&prison->lock, flags); +} + +/* + * Sometimes we don't want the holder, just the additional bios. + */ +static void __cell_release_no_holder(struct cell *cell, struct bio_list *inmates) +{ + struct bio_prison *prison = cell->prison; - b = bio_list_pop(&bios); - BUG_ON(b != bio); - BUG_ON(!bio_list_empty(&bios)); + hlist_del(&cell->list); + bio_list_merge(inmates, &cell->bios); + + mempool_free(cell, prison->cell_pool); +} + +static void cell_release_no_holder(struct cell *cell, struct bio_list *inmates) +{ + unsigned long flags; + struct bio_prison *prison = cell->prison; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release_no_holder(cell, inmates); + spin_unlock_irqrestore(&prison->lock, flags); } static void cell_error(struct cell *cell) @@ -800,21 +827,16 @@ static void cell_defer(struct thin_c *tc, struct cell *cell, * Same as cell_defer above, except it omits one particular detainee, * a write bio that covers the block and has already been processed. */ -static void cell_defer_except(struct thin_c *tc, struct cell *cell, - struct bio *exception) +static void cell_defer_except(struct thin_c *tc, struct cell *cell) { struct bio_list bios; - struct bio *bio; struct pool *pool = tc->pool; unsigned long flags; bio_list_init(&bios); - cell_release(cell, &bios); spin_lock_irqsave(&pool->lock, flags); - while ((bio = bio_list_pop(&bios))) - if (bio != exception) - bio_list_add(&pool->deferred_bios, bio); + cell_release_no_holder(cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); wake_worker(pool); @@ -854,7 +876,7 @@ static void process_prepared_mapping(struct new_mapping *m) * the bios in the cell. */ if (bio) { - cell_defer_except(tc, m->cell, bio); + cell_defer_except(tc, m->cell); bio_endio(bio, 0); } else cell_defer(tc, m->cell, m->data_block); -- cgit v1.2.3 From b0988900bae9ecf968a8a8d086a9eec671a9517a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 28 Mar 2012 18:41:23 +0100 Subject: dm persistent data: fix btree rebalancing after remove When we remove an entry from a node we sometimes rebalance with it's two neighbours. This wasn't being done correctly; in some cases entries have to move all the way from the right neighbour to the left neighbour, or vice versa. This patch pretty much re-writes the balancing code to fix it. This code is barely used currently; only when you delete a thin device, and then only if you have hundreds of them in the same pool. Once we have discard support, which removes mappings, this will be used much more heavily. Signed-off-by: Joe Thornber Cc: stable@kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-btree-remove.c | 174 +++++++++++++++------------ 1 file changed, 99 insertions(+), 75 deletions(-) (limited to 'drivers') diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 023fbc2d389e..1a35caf2563d 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -128,18 +128,9 @@ static void delete_at(struct node *n, unsigned index) n->header.nr_entries = cpu_to_le32(nr_entries - 1); } -static unsigned del_threshold(struct node *n) -{ - return le32_to_cpu(n->header.max_entries) / 3; -} - static unsigned merge_threshold(struct node *n) { - /* - * The extra one is because we know we're potentially going to - * delete an entry. - */ - return 2 * (le32_to_cpu(n->header.max_entries) / 3) + 1; + return le32_to_cpu(n->header.max_entries) / 3; } struct child { @@ -188,6 +179,15 @@ static int exit_child(struct dm_btree_info *info, struct child *c) static void shift(struct node *left, struct node *right, int count) { + uint32_t nr_left = le32_to_cpu(left->header.nr_entries); + uint32_t nr_right = le32_to_cpu(right->header.nr_entries); + uint32_t max_entries = le32_to_cpu(left->header.max_entries); + uint32_t r_max_entries = le32_to_cpu(right->header.max_entries); + + BUG_ON(max_entries != r_max_entries); + BUG_ON(nr_left - count > max_entries); + BUG_ON(nr_right + count > max_entries); + if (!count) return; @@ -199,13 +199,8 @@ static void shift(struct node *left, struct node *right, int count) node_shift(right, count); } - left->header.nr_entries = - cpu_to_le32(le32_to_cpu(left->header.nr_entries) - count); - BUG_ON(le32_to_cpu(left->header.nr_entries) > le32_to_cpu(left->header.max_entries)); - - right->header.nr_entries = - cpu_to_le32(le32_to_cpu(right->header.nr_entries) + count); - BUG_ON(le32_to_cpu(right->header.nr_entries) > le32_to_cpu(right->header.max_entries)); + left->header.nr_entries = cpu_to_le32(nr_left - count); + right->header.nr_entries = cpu_to_le32(nr_right + count); } static void __rebalance2(struct dm_btree_info *info, struct node *parent, @@ -215,8 +210,9 @@ static void __rebalance2(struct dm_btree_info *info, struct node *parent, struct node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); + unsigned threshold = 2 * merge_threshold(left) + 1; - if (nr_left + nr_right <= merge_threshold(left)) { + if (nr_left + nr_right < threshold) { /* * Merge */ @@ -234,9 +230,6 @@ static void __rebalance2(struct dm_btree_info *info, struct node *parent, * Rebalance. */ unsigned target_left = (nr_left + nr_right) / 2; - unsigned shift_ = nr_left - target_left; - BUG_ON(le32_to_cpu(left->header.max_entries) <= nr_left - shift_); - BUG_ON(le32_to_cpu(right->header.max_entries) <= nr_right + shift_); shift(left, right, nr_left - target_left); *key_ptr(parent, r->index) = right->keys[0]; } @@ -272,6 +265,84 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, return exit_child(info, &right); } +/* + * We dump as many entries from center as possible into left, then the rest + * in right, then rebalance2. This wastes some cpu, but I want something + * simple atm. + */ +static void delete_center_node(struct dm_btree_info *info, struct node *parent, + struct child *l, struct child *c, struct child *r, + struct node *left, struct node *center, struct node *right, + uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) +{ + uint32_t max_entries = le32_to_cpu(left->header.max_entries); + unsigned shift = min(max_entries - nr_left, nr_center); + + BUG_ON(nr_left + shift > max_entries); + node_copy(left, center, -shift); + left->header.nr_entries = cpu_to_le32(nr_left + shift); + + if (shift != nr_center) { + shift = nr_center - shift; + BUG_ON((nr_right + shift) > max_entries); + node_shift(right, shift); + node_copy(center, right, shift); + right->header.nr_entries = cpu_to_le32(nr_right + shift); + } + *key_ptr(parent, r->index) = right->keys[0]; + + delete_at(parent, c->index); + r->index--; + + dm_tm_dec(info->tm, dm_block_location(c->block)); + __rebalance2(info, parent, l, r); +} + +/* + * Redistributes entries among 3 sibling nodes. + */ +static void redistribute3(struct dm_btree_info *info, struct node *parent, + struct child *l, struct child *c, struct child *r, + struct node *left, struct node *center, struct node *right, + uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) +{ + int s; + uint32_t max_entries = le32_to_cpu(left->header.max_entries); + unsigned target = (nr_left + nr_center + nr_right) / 3; + BUG_ON(target > max_entries); + + if (nr_left < nr_right) { + s = nr_left - target; + + if (s < 0 && nr_center < -s) { + /* not enough in central node */ + shift(left, center, nr_center); + s = nr_center - target; + shift(left, right, s); + nr_right += s; + } else + shift(left, center, s); + + shift(center, right, target - nr_right); + + } else { + s = target - nr_right; + if (s > 0 && nr_center < s) { + /* not enough in central node */ + shift(center, right, nr_center); + s = target - nr_center; + shift(left, right, s); + nr_left -= s; + } else + shift(center, right, s); + + shift(left, center, nr_left - target); + } + + *key_ptr(parent, c->index) = center->keys[0]; + *key_ptr(parent, r->index) = right->keys[0]; +} + static void __rebalance3(struct dm_btree_info *info, struct node *parent, struct child *l, struct child *c, struct child *r) { @@ -282,62 +353,18 @@ static void __rebalance3(struct dm_btree_info *info, struct node *parent, uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_center = le32_to_cpu(center->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); - uint32_t max_entries = le32_to_cpu(left->header.max_entries); - unsigned target; + unsigned threshold = merge_threshold(left) * 4 + 1; BUG_ON(left->header.max_entries != center->header.max_entries); BUG_ON(center->header.max_entries != right->header.max_entries); - if (((nr_left + nr_center + nr_right) / 2) < merge_threshold(center)) { - /* - * Delete center node: - * - * We dump as many entries from center as possible into - * left, then the rest in right, then rebalance2. This - * wastes some cpu, but I want something simple atm. - */ - unsigned shift = min(max_entries - nr_left, nr_center); - - BUG_ON(nr_left + shift > max_entries); - node_copy(left, center, -shift); - left->header.nr_entries = cpu_to_le32(nr_left + shift); - - if (shift != nr_center) { - shift = nr_center - shift; - BUG_ON((nr_right + shift) >= max_entries); - node_shift(right, shift); - node_copy(center, right, shift); - right->header.nr_entries = cpu_to_le32(nr_right + shift); - } - *key_ptr(parent, r->index) = right->keys[0]; - - delete_at(parent, c->index); - r->index--; - - dm_tm_dec(info->tm, dm_block_location(c->block)); - __rebalance2(info, parent, l, r); - - return; - } - - /* - * Rebalance - */ - target = (nr_left + nr_center + nr_right) / 3; - BUG_ON(target > max_entries); - - /* - * Adjust the left node - */ - shift(left, center, nr_left - target); - - /* - * Adjust the right node - */ - shift(center, right, target - nr_right); - *key_ptr(parent, c->index) = center->keys[0]; - *key_ptr(parent, r->index) = right->keys[0]; + if ((nr_left + nr_center + nr_right) < threshold) + delete_center_node(info, parent, l, c, r, left, center, right, + nr_left, nr_center, nr_right); + else + redistribute3(info, parent, l, c, r, left, center, right, + nr_left, nr_center, nr_right); } static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, @@ -441,9 +468,6 @@ static int rebalance_children(struct shadow_spine *s, if (r) return r; - if (child_entries > del_threshold(n)) - return 0; - has_left_sibling = i > 0; has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); -- cgit v1.2.3 From e0b215da8fde99b6a9d82ee4c3600ec223cc7959 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Wed, 28 Mar 2012 18:41:24 +0100 Subject: dm uevent: no longer experimental Drop EXPERIMENTAL tag from dm-uevent. It's not changed for a while and some userspace tools are relying upon it. Signed-off-by: Alasdair G Kergon --- drivers/md/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index faa4741df6d3..5cbb481435f7 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -359,8 +359,8 @@ config DM_DELAY If unsure, say N. config DM_UEVENT - bool "DM uevents (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + bool "DM uevents" + depends on BLK_DEV_DM ---help--- Generate udev events for DM events. -- cgit v1.2.3 From 035220b33d6865d81d5433600def53373cca7127 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Wed, 28 Mar 2012 18:41:24 +0100 Subject: dm raid: no longer experimental The dm raid module (using md) is becoming the preferred way of creating long-lived mirrors through userspace LVM so remove the EXPERIMENTAL tag. Signed-off-by: Alasdair G Kergon --- drivers/md/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 5cbb481435f7..71000078351a 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -277,8 +277,8 @@ config DM_MIRROR needed for live data migration tools such as 'pvmove'. config DM_RAID - tristate "RAID 1/4/5/6 target (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + tristate "RAID 1/4/5/6 target" + depends on BLK_DEV_DM select MD_RAID1 select MD_RAID456 select BLK_DEV_MD -- cgit v1.2.3 From fe878f34df89ad4af758f40bbec829807dc93a00 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 28 Mar 2012 18:41:24 +0100 Subject: dm thin: correct comments Remove documentation for unimplemented 'trim' message. I'd planned a 'trim' target message for shrinking thin devices, but this is better handled via the discard ioctl. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index da2f0217df66..1791134cf477 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -72,7 +72,7 @@ * missed out if the io covers the block. (schedule_copy). * * iv) insert the new mapping into the origin's btree - * (process_prepared_mappings). This act of inserting breaks some + * (process_prepared_mapping). This act of inserting breaks some * sharing of btree nodes between the two devices. Breaking sharing only * effects the btree of that specific device. Btrees for the other * devices that share the block never change. The btree for the origin -- cgit v1.2.3 From 574ce07eb0014069f1da763c219bb30ea4c266ec Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 28 Mar 2012 18:41:24 +0100 Subject: dm table: simplify call to free_devices free_devices in dm_table.c already uses list_for_each(), so we don't need to check if the list is empty. Signed-off-by: Hannes Reinecke Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 63cc54289aff..a3d1e18317f4 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -268,8 +268,7 @@ void dm_table_destroy(struct dm_table *t) vfree(t->highs); /* free the device list */ - if (t->devices.next != &t->devices) - free_devices(&t->devices); + free_devices(&t->devices); dm_free_md_mempools(t->mempools); -- cgit v1.2.3 From 4d7b38b7d944a79da3793b6c92d38682f3905ac9 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 28 Mar 2012 18:41:25 +0100 Subject: dm: clear bi_end_io on remapping failure As a precaution, set bi_end_io to NULL when failing to remap. Signed-off-by: Hannes Reinecke Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b89c548ec3f8..e24143cc2040 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1016,6 +1016,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, /* * Store bio_set for cleanup. */ + clone->bi_end_io = NULL; clone->bi_private = md->bs; bio_put(clone); free_tio(md, tio); -- cgit v1.2.3 From 466891f9959b500e037836737c064a72f2bbe8cf Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Wed, 28 Mar 2012 18:41:25 +0100 Subject: dm mpath: detect invalid map_context The map_context pointer should always be set. However, we have reports that upon requeuing it is not set correctly. So add set and clear functions with a BUG_ON() to track the issue properly. Signed-off-by: Jun'ichi Nomura Cc: Mike Snitzer Acked-by: Hannes Reinecke Tested-by: Heiko Carstens Acked-by: Dave Wysochanski Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 801d92d237cf..e92a0005eeff 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -226,6 +226,27 @@ static void free_multipath(struct multipath *m) kfree(m); } +static int set_mapinfo(struct multipath *m, union map_info *info) +{ + struct dm_mpath_io *mpio; + + mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); + if (!mpio) + return -ENOMEM; + + memset(mpio, 0, sizeof(*mpio)); + info->ptr = mpio; + + return 0; +} + +static void clear_mapinfo(struct multipath *m, union map_info *info) +{ + struct dm_mpath_io *mpio = info->ptr; + + info->ptr = NULL; + mempool_free(mpio, m->mpio_pool); +} /*----------------------------------------------- * Path selection @@ -341,13 +362,14 @@ static int __must_push_back(struct multipath *m) } static int map_io(struct multipath *m, struct request *clone, - struct dm_mpath_io *mpio, unsigned was_queued) + union map_info *map_context, unsigned was_queued) { int r = DM_MAPIO_REMAPPED; size_t nr_bytes = blk_rq_bytes(clone); unsigned long flags; struct pgpath *pgpath; struct block_device *bdev; + struct dm_mpath_io *mpio = map_context->ptr; spin_lock_irqsave(&m->lock, flags); @@ -423,7 +445,6 @@ static void dispatch_queued_ios(struct multipath *m) { int r; unsigned long flags; - struct dm_mpath_io *mpio; union map_info *info; struct request *clone, *n; LIST_HEAD(cl); @@ -436,16 +457,15 @@ static void dispatch_queued_ios(struct multipath *m) list_del_init(&clone->queuelist); info = dm_get_rq_mapinfo(clone); - mpio = info->ptr; - r = map_io(m, clone, mpio, 1); + r = map_io(m, clone, info, 1); if (r < 0) { - mempool_free(mpio, m->mpio_pool); + clear_mapinfo(m, info); dm_kill_unmapped_request(clone, r); } else if (r == DM_MAPIO_REMAPPED) dm_dispatch_request(clone); else if (r == DM_MAPIO_REQUEUE) { - mempool_free(mpio, m->mpio_pool); + clear_mapinfo(m, info); dm_requeue_unmapped_request(clone); } } @@ -908,20 +928,16 @@ static int multipath_map(struct dm_target *ti, struct request *clone, union map_info *map_context) { int r; - struct dm_mpath_io *mpio; struct multipath *m = (struct multipath *) ti->private; - mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); - if (!mpio) + if (set_mapinfo(m, map_context) < 0) /* ENOMEM, requeue */ return DM_MAPIO_REQUEUE; - memset(mpio, 0, sizeof(*mpio)); - map_context->ptr = mpio; clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; - r = map_io(m, clone, mpio, 0); + r = map_io(m, clone, map_context, 0); if (r < 0 || r == DM_MAPIO_REQUEUE) - mempool_free(mpio, m->mpio_pool); + clear_mapinfo(m, map_context); return r; } @@ -1261,13 +1277,15 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, struct path_selector *ps; int r; + BUG_ON(!mpio); + r = do_end_io(m, clone, error, mpio); if (pgpath) { ps = &pgpath->pg->ps; if (ps->type->end_io) ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); } - mempool_free(mpio, m->mpio_pool); + clear_mapinfo(m, map_context); return r; } -- cgit v1.2.3 From a3aefb395e4f321c8b1314c88f1123624adcf743 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 28 Mar 2012 18:41:25 +0100 Subject: dm persistent data: remove redundant value_size arg from value_ptr Now that the value_size is held within every node of the btrees we can remove this argument from value_ptr(). For the last few months a BUG_ON has been checking this argument is the same as that held in the node. No issues were reported. So this is a safe change. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-btree-internal.h | 7 ++----- drivers/md/persistent-data/dm-btree-remove.c | 28 +++++++++++++------------- drivers/md/persistent-data/dm-btree.c | 27 ++++++++++++------------- 3 files changed, 29 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index d279c768f8f1..5709bfeab1e8 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -108,12 +108,9 @@ static inline void *value_base(struct node *n) return &n->keys[le32_to_cpu(n->header.max_entries)]; } -/* - * FIXME: Now that value size is stored in node we don't need the third parm. - */ -static inline void *value_ptr(struct node *n, uint32_t index, size_t value_size) +static inline void *value_ptr(struct node *n, uint32_t index) { - BUG_ON(value_size != le32_to_cpu(n->header.value_size)); + uint32_t value_size = le32_to_cpu(n->header.value_size); return value_base(n) + (value_size * index); } diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 1a35caf2563d..aa71e2359a07 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -61,20 +61,20 @@ static void node_shift(struct node *n, int shift) if (shift < 0) { shift = -shift; BUG_ON(shift > nr_entries); - BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift, value_size)); + BUG_ON((void *) key_ptr(n, shift) >= value_ptr(n, shift)); memmove(key_ptr(n, 0), key_ptr(n, shift), (nr_entries - shift) * sizeof(__le64)); - memmove(value_ptr(n, 0, value_size), - value_ptr(n, shift, value_size), + memmove(value_ptr(n, 0), + value_ptr(n, shift), (nr_entries - shift) * value_size); } else { BUG_ON(nr_entries + shift > le32_to_cpu(n->header.max_entries)); memmove(key_ptr(n, shift), key_ptr(n, 0), nr_entries * sizeof(__le64)); - memmove(value_ptr(n, shift, value_size), - value_ptr(n, 0, value_size), + memmove(value_ptr(n, shift), + value_ptr(n, 0), nr_entries * value_size); } } @@ -91,16 +91,16 @@ static void node_copy(struct node *left, struct node *right, int shift) memcpy(key_ptr(left, nr_left), key_ptr(right, 0), shift * sizeof(__le64)); - memcpy(value_ptr(left, nr_left, value_size), - value_ptr(right, 0, value_size), + memcpy(value_ptr(left, nr_left), + value_ptr(right, 0), shift * value_size); } else { BUG_ON(shift > le32_to_cpu(right->header.max_entries)); memcpy(key_ptr(right, 0), key_ptr(left, nr_left - shift), shift * sizeof(__le64)); - memcpy(value_ptr(right, 0, value_size), - value_ptr(left, nr_left - shift, value_size), + memcpy(value_ptr(right, 0), + value_ptr(left, nr_left - shift), shift * value_size); } } @@ -120,8 +120,8 @@ static void delete_at(struct node *n, unsigned index) key_ptr(n, index + 1), nr_to_copy * sizeof(__le64)); - memmove(value_ptr(n, index, value_size), - value_ptr(n, index + 1, value_size), + memmove(value_ptr(n, index), + value_ptr(n, index + 1), nr_to_copy * value_size); } @@ -166,7 +166,7 @@ static int init_child(struct dm_btree_info *info, struct node *parent, if (inc) inc_children(info->tm, result->n, &le64_type); - *((__le64 *) value_ptr(parent, index, sizeof(__le64))) = + *((__le64 *) value_ptr(parent, index)) = cpu_to_le64(dm_block_location(result->block)); return 0; @@ -520,7 +520,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, */ if (shadow_has_parent(s)) { __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); - memcpy(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(__le64)), + memcpy(value_ptr(dm_block_data(shadow_parent(s)), i), &location, sizeof(__le64)); } @@ -577,7 +577,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, if (info->value_type.dec) info->value_type.dec(info->value_type.context, - value_ptr(n, index, info->value_type.size)); + value_ptr(n, index)); delete_at(n, index); } diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index bd1e7ffbe26c..d12b2cc51f1a 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -74,8 +74,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n, dm_tm_inc(tm, value64(n, i)); else if (vt->inc) for (i = 0; i < nr_entries; i++) - vt->inc(vt->context, - value_ptr(n, i, vt->size)); + vt->inc(vt->context, value_ptr(n, i)); } static int insert_at(size_t value_size, struct node *node, unsigned index, @@ -281,7 +280,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) for (i = 0; i < f->nr_children; i++) info->value_type.dec(info->value_type.context, - value_ptr(f->n, i, info->value_type.size)); + value_ptr(f->n, i)); } f->current_child = f->nr_children; } @@ -320,7 +319,7 @@ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, } while (!(flags & LEAF_NODE)); *result_key = le64_to_cpu(ro_node(s)->keys[i]); - memcpy(v, value_ptr(ro_node(s), i, value_size), value_size); + memcpy(v, value_ptr(ro_node(s), i), value_size); return 0; } @@ -432,7 +431,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ? sizeof(uint64_t) : s->info->value_type.size; - memcpy(value_ptr(rn, 0, size), value_ptr(ln, nr_left, size), + memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left), size * nr_right); /* @@ -443,7 +442,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, pn = dm_block_data(parent); location = cpu_to_le64(dm_block_location(left)); __dm_bless_for_disk(&location); - memcpy_disk(value_ptr(pn, parent_index, sizeof(__le64)), + memcpy_disk(value_ptr(pn, parent_index), &location, sizeof(__le64)); location = cpu_to_le64(dm_block_location(right)); @@ -529,8 +528,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? sizeof(__le64) : s->info->value_type.size; - memcpy(value_ptr(ln, 0, size), value_ptr(pn, 0, size), nr_left * size); - memcpy(value_ptr(rn, 0, size), value_ptr(pn, nr_left, size), + memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); + memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left), nr_right * size); /* new_parent should just point to l and r now */ @@ -545,12 +544,12 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) val = cpu_to_le64(dm_block_location(left)); __dm_bless_for_disk(&val); pn->keys[0] = ln->keys[0]; - memcpy_disk(value_ptr(pn, 0, sizeof(__le64)), &val, sizeof(__le64)); + memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64)); val = cpu_to_le64(dm_block_location(right)); __dm_bless_for_disk(&val); pn->keys[1] = rn->keys[0]; - memcpy_disk(value_ptr(pn, 1, sizeof(__le64)), &val, sizeof(__le64)); + memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); /* * rejig the spine. This is ugly, since it knows too @@ -595,7 +594,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); __dm_bless_for_disk(&location); - memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i, sizeof(uint64_t)), + memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i), &location, sizeof(__le64)); } @@ -710,12 +709,12 @@ static int insert(struct dm_btree_info *info, dm_block_t root, (!info->value_type.equal || !info->value_type.equal( info->value_type.context, - value_ptr(n, index, info->value_type.size), + value_ptr(n, index), value))) { info->value_type.dec(info->value_type.context, - value_ptr(n, index, info->value_type.size)); + value_ptr(n, index)); } - memcpy_disk(value_ptr(n, index, info->value_type.size), + memcpy_disk(value_ptr(n, index), value, info->value_type.size); } -- cgit v1.2.3 From fef838cc1ac34e599c74888274506d76767f3098 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 28 Mar 2012 18:41:25 +0100 Subject: dm thin metadata: pass correct space map to dm_sm_root_size Fix a harmless typo. The root is a chunk of data that gets written to the superblock. This data is used to recreate the space map when opening a metadata area. We have two space maps; one tracking space on the metadata device and one of the data device. Both of these use the same format for their root, so this typo was harmless. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 237571af77fd..a680c761341f 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -614,7 +614,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) goto out; - r = dm_sm_root_size(pmd->metadata_sm, &data_len); + r = dm_sm_root_size(pmd->data_sm, &data_len); if (r < 0) goto out; -- cgit v1.2.3 From 0447568fc51e0268e201f7086d2450cf986e0411 Mon Sep 17 00:00:00 2001 From: Jonathan E Brassow Date: Wed, 28 Mar 2012 18:41:26 +0100 Subject: dm raid: handle failed devices during start up The dm-raid code currently fails to create a RAID array if any of the superblocks cannot be read. This was an oversight as there is already code to handle this case if the values ('- -') were provided for the failed array position. With this patch, if a superblock cannot be read, the array position's fields are initialized as though '- -' was set in the table. That is, the device is failed and the position should not be used, but if there is sufficient redundancy, the array should still be activated. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c5a875d7b882..b0ba52459ed7 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -604,7 +604,9 @@ static int read_disk_sb(struct md_rdev *rdev, int size) return 0; if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) { - DMERR("Failed to read device superblock"); + DMERR("Failed to read superblock of device at position %d", + rdev->raid_disk); + set_bit(Faulty, &rdev->flags); return -EINVAL; } @@ -855,9 +857,25 @@ static int super_validate(struct mddev *mddev, struct md_rdev *rdev) static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) { int ret; + unsigned redundancy = 0; + struct raid_dev *dev; struct md_rdev *rdev, *freshest; struct mddev *mddev = &rs->md; + switch (rs->raid_type->level) { + case 1: + redundancy = rs->md.raid_disks - 1; + break; + case 4: + case 5: + case 6: + redundancy = rs->raid_type->parity_devs; + break; + default: + ti->error = "Unknown RAID type"; + return -EINVAL; + } + freshest = NULL; rdev_for_each(rdev, mddev) { if (!rdev->meta_bdev) @@ -872,6 +890,37 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) case 0: break; default: + dev = container_of(rdev, struct raid_dev, rdev); + if (redundancy--) { + if (dev->meta_dev) + dm_put_device(ti, dev->meta_dev); + + dev->meta_dev = NULL; + rdev->meta_bdev = NULL; + + if (rdev->sb_page) + put_page(rdev->sb_page); + + rdev->sb_page = NULL; + + rdev->sb_loaded = 0; + + /* + * We might be able to salvage the data device + * even though the meta device has failed. For + * now, we behave as though '- -' had been + * set for this device in the table. + */ + if (dev->data_dev) + dm_put_device(ti, dev->data_dev); + + dev->data_dev = NULL; + rdev->bdev = NULL; + + list_del(&rdev->same_set); + + continue; + } ti->error = "Failed to load superblock"; return ret; } @@ -1214,7 +1263,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.2.3 From 31998ef19385c944600d9a981b96252f98204bee Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 28 Mar 2012 18:41:26 +0100 Subject: dm: reject trailing characters in sccanf input Device mapper uses sscanf to convert arguments to numbers. The problem is that the way we use it ignores additional unmatched characters in the scanned string. For example, this `if (sscanf(string, "%d", &number) == 1)' will match a number, but also it will match number with some garbage appended, like "123abc". As a result, device mapper accepts garbage after some numbers. For example the command `dmsetup create vg1-new --table "0 16384 linear 254:1bla 34816bla"' will pass without an error. This patch fixes all sscanf uses in device mapper. It appends "%c" with a pointer to a dummy character variable to every sscanf statement. The construct `if (sscanf(string, "%d%c", &number, &dummy) == 1)' succeeds only if string is a null-terminated number (optionally preceded by some whitespace characters). If there is some character appended after the number, sscanf matches "%c", writes the character to the dummy variable and returns 2. We check the return value for 1 and consequently reject numbers with some garbage appended. Signed-off-by: Mikulas Patocka Acked-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 8 +++++--- drivers/md/dm-delay.c | 9 +++++---- drivers/md/dm-flakey.c | 3 ++- drivers/md/dm-ioctl.c | 5 +++-- drivers/md/dm-linear.c | 3 ++- drivers/md/dm-log.c | 3 ++- drivers/md/dm-mpath.c | 6 ++++-- drivers/md/dm-queue-length.c | 3 ++- drivers/md/dm-raid1.c | 12 ++++++++---- drivers/md/dm-round-robin.c | 3 ++- drivers/md/dm-service-time.c | 5 +++-- drivers/md/dm-stripe.c | 3 ++- drivers/md/dm-table.c | 6 ++++-- 13 files changed, 44 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 87de0d6b49f2..3f06df59fd82 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1415,6 +1415,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount; char *cipher_api = NULL; int cpu, ret = -EINVAL; + char dummy; /* Convert to crypto api definition? */ if (strchr(cipher_in, '(')) { @@ -1436,7 +1437,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, if (!keycount) cc->tfms_count = 1; - else if (sscanf(keycount, "%u", &cc->tfms_count) != 1 || + else if (sscanf(keycount, "%u%c", &cc->tfms_count, &dummy) != 1 || !is_power_of_2(cc->tfms_count)) { ti->error = "Bad cipher key count specification"; return -EINVAL; @@ -1581,6 +1582,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) int ret; struct dm_arg_set as; const char *opt_string; + char dummy; static struct dm_arg _args[] = { {0, 1, "Invalid number of feature args"}, @@ -1638,7 +1640,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ret = -EINVAL; - if (sscanf(argv[2], "%llu", &tmpll) != 1) { + if (sscanf(argv[2], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid iv_offset sector"; goto bad; } @@ -1649,7 +1651,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - if (sscanf(argv[4], "%llu", &tmpll) != 1) { + if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index f18375dcedd9..2dc22dddb2ae 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -131,6 +131,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct delay_c *dc; unsigned long long tmpll; + char dummy; if (argc != 3 && argc != 6) { ti->error = "requires exactly 3 or 6 arguments"; @@ -145,13 +146,13 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) dc->reads = dc->writes = 0; - if (sscanf(argv[1], "%llu", &tmpll) != 1) { + if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; } dc->start_read = tmpll; - if (sscanf(argv[2], "%u", &dc->read_delay) != 1) { + if (sscanf(argv[2], "%u%c", &dc->read_delay, &dummy) != 1) { ti->error = "Invalid delay"; goto bad; } @@ -166,13 +167,13 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (argc == 3) goto out; - if (sscanf(argv[4], "%llu", &tmpll) != 1) { + if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid write device sector"; goto bad_dev_read; } dc->start_write = tmpll; - if (sscanf(argv[5], "%u", &dc->write_delay) != 1) { + if (sscanf(argv[5], "%u%c", &dc->write_delay, &dummy) != 1) { ti->error = "Invalid write delay"; goto bad_dev_read; } diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index b280c433e4a0..ac49c01f1a44 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -160,6 +160,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) unsigned long long tmpll; struct dm_arg_set as; const char *devname; + char dummy; as.argc = argc; as.argv = argv; @@ -178,7 +179,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) devname = dm_shift_arg(&as); - if (sscanf(dm_shift_arg(&as), "%llu", &tmpll) != 1) { + if (sscanf(dm_shift_arg(&as), "%llu%c", &tmpll, &dummy) != 1) { ti->error = "Invalid device sector"; goto bad; } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 1ce84ed0b765..a1a3e6df17b8 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -880,6 +880,7 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) struct hd_geometry geometry; unsigned long indata[4]; char *geostr = (char *) param + param->data_start; + char dummy; md = find_device(param); if (!md) @@ -891,8 +892,8 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) goto out; } - x = sscanf(geostr, "%lu %lu %lu %lu", indata, - indata + 1, indata + 2, indata + 3); + x = sscanf(geostr, "%lu %lu %lu %lu%c", indata, + indata + 1, indata + 2, indata + 3, &dummy); if (x != 4) { DMWARN("Unable to interpret geometry settings."); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 9728839f844a..3639eeab6042 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -29,6 +29,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct linear_c *lc; unsigned long long tmp; + char dummy; if (argc != 2) { ti->error = "Invalid argument count"; @@ -41,7 +42,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -ENOMEM; } - if (sscanf(argv[1], "%llu", &tmp) != 1) { + if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) { ti->error = "dm-linear: Invalid device sector"; goto bad; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 3b52bb72bd1f..65ebaebf502b 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -369,6 +369,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, unsigned int region_count; size_t bitset_size, buf_size; int r; + char dummy; if (argc < 1 || argc > 2) { DMWARN("wrong number of arguments to dirty region log"); @@ -387,7 +388,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, } } - if (sscanf(argv[0], "%u", ®ion_size) != 1 || + if (sscanf(argv[0], "%u%c", ®ion_size, &dummy) != 1 || !_check_region_size(ti, region_size)) { DMWARN("invalid region size %s", argv[0]); return -EINVAL; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e92a0005eeff..922a3385eead 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1070,8 +1070,9 @@ static int switch_pg_num(struct multipath *m, const char *pgstr) struct priority_group *pg; unsigned pgnum; unsigned long flags; + char dummy; - if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || + if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || (pgnum > m->nr_priority_groups)) { DMWARN("invalid PG number supplied to switch_pg_num"); return -EINVAL; @@ -1101,8 +1102,9 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) { struct priority_group *pg; unsigned pgnum; + char dummy; - if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || + if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || (pgnum > m->nr_priority_groups)) { DMWARN("invalid PG number supplied to bypass_pg"); return -EINVAL; diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-queue-length.c index 03a837aa5ce6..3941fae0de9f 100644 --- a/drivers/md/dm-queue-length.c +++ b/drivers/md/dm-queue-length.c @@ -112,6 +112,7 @@ static int ql_add_path(struct path_selector *ps, struct dm_path *path, struct selector *s = ps->context; struct path_info *pi; unsigned repeat_count = QL_MIN_IO; + char dummy; /* * Arguments: [] @@ -123,7 +124,7 @@ static int ql_add_path(struct path_selector *ps, struct dm_path *path, return -EINVAL; } - if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) { + if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) { *error = "queue-length ps: invalid repeat count"; return -EINVAL; } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9bfd057be686..d039de8322f0 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -924,8 +924,9 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, unsigned int mirror, char **argv) { unsigned long long offset; + char dummy; - if (sscanf(argv[1], "%llu", &offset) != 1) { + if (sscanf(argv[1], "%llu%c", &offset, &dummy) != 1) { ti->error = "Invalid offset"; return -EINVAL; } @@ -953,13 +954,14 @@ static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, { unsigned param_count; struct dm_dirty_log *dl; + char dummy; if (argc < 2) { ti->error = "Insufficient mirror log arguments"; return NULL; } - if (sscanf(argv[1], "%u", ¶m_count) != 1) { + if (sscanf(argv[1], "%u%c", ¶m_count, &dummy) != 1) { ti->error = "Invalid mirror log argument count"; return NULL; } @@ -986,13 +988,14 @@ static int parse_features(struct mirror_set *ms, unsigned argc, char **argv, { unsigned num_features; struct dm_target *ti = ms->ti; + char dummy; *args_used = 0; if (!argc) return 0; - if (sscanf(argv[0], "%u", &num_features) != 1) { + if (sscanf(argv[0], "%u%c", &num_features, &dummy) != 1) { ti->error = "Invalid number of features"; return -EINVAL; } @@ -1036,6 +1039,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) unsigned int nr_mirrors, m, args_used; struct mirror_set *ms; struct dm_dirty_log *dl; + char dummy; dl = create_dirty_log(ti, argc, argv, &args_used); if (!dl) @@ -1044,7 +1048,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) argv += args_used; argc -= args_used; - if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 || + if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 || nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) { ti->error = "Invalid number of mirrors"; dm_dirty_log_destroy(dl); diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c index 27f1d423b76c..6ab1192cdd5f 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-round-robin.c @@ -114,6 +114,7 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path, struct selector *s = (struct selector *) ps->context; struct path_info *pi; unsigned repeat_count = RR_MIN_IO; + char dummy; if (argc > 1) { *error = "round-robin ps: incorrect number of arguments"; @@ -121,7 +122,7 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path, } /* First path argument is number of I/Os before switching path */ - if ((argc == 1) && (sscanf(argv[0], "%u", &repeat_count) != 1)) { + if ((argc == 1) && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) { *error = "round-robin ps: invalid repeat count"; return -EINVAL; } diff --git a/drivers/md/dm-service-time.c b/drivers/md/dm-service-time.c index 59883bd78214..9df8f6bd6418 100644 --- a/drivers/md/dm-service-time.c +++ b/drivers/md/dm-service-time.c @@ -110,6 +110,7 @@ static int st_add_path(struct path_selector *ps, struct dm_path *path, struct path_info *pi; unsigned repeat_count = ST_MIN_IO; unsigned relative_throughput = 1; + char dummy; /* * Arguments: [ []] @@ -128,13 +129,13 @@ static int st_add_path(struct path_selector *ps, struct dm_path *path, return -EINVAL; } - if (argc && (sscanf(argv[0], "%u", &repeat_count) != 1)) { + if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) { *error = "service-time ps: invalid repeat count"; return -EINVAL; } if ((argc == 2) && - (sscanf(argv[1], "%u", &relative_throughput) != 1 || + (sscanf(argv[1], "%u%c", &relative_throughput, &dummy) != 1 || relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) { *error = "service-time ps: invalid relative_throughput value"; return -EINVAL; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 3d80cf0c152d..35c94ff24ad5 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -75,8 +75,9 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, unsigned int stripe, char **argv) { unsigned long long start; + char dummy; - if (sscanf(argv[1], "%llu", &start) != 1) + if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1) return -EINVAL; if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index a3d1e18317f4..2e227fbf1622 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -463,10 +463,11 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, struct dm_dev_internal *dd; unsigned int major, minor; struct dm_table *t = ti->table; + char dummy; BUG_ON(!t); - if (sscanf(path, "%u:%u", &major, &minor) == 2) { + if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) { /* Extract the major/minor numbers */ dev = MKDEV(major, minor); if (MAJOR(dev) != major || MINOR(dev) != minor) @@ -841,9 +842,10 @@ static int validate_next_arg(struct dm_arg *arg, struct dm_arg_set *arg_set, unsigned *value, char **error, unsigned grouped) { const char *arg_str = dm_shift_arg(arg_set); + char dummy; if (!arg_str || - (sscanf(arg_str, "%u", value) != 1) || + (sscanf(arg_str, "%u%c", value, &dummy) != 1) || (*value < arg->min) || (*value > arg->max) || (grouped && arg_set->argc < *value)) { -- cgit v1.2.3 From 905e51b39a5558706a6ed883fe104de3d417050b Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 28 Mar 2012 18:41:27 +0100 Subject: dm thin: commit outstanding data every second Commit unwritten data every second to prevent too much building up. Released blocks don't become available until after the next commit (for crash resilience). Prior to this patch commits were only triggered by a message to the target or a REQ_{FLUSH,FUA} bio. This allowed far too big a position to build up. The interval is hard-coded to 1 second. This is a sensible setting. I'm not making this user configurable, since there isn't much to be gained by tweaking this - and a lot lost by setting it far too high. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 1791134cf477..bcb143396fe0 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -23,6 +23,7 @@ #define DEFERRED_SET_SIZE 64 #define MAPPING_POOL_SIZE 1024 #define PRISON_CELLS 1024 +#define COMMIT_PERIOD HZ /* * The block size of the device holding pool data must be @@ -520,8 +521,10 @@ struct pool { struct workqueue_struct *wq; struct work_struct worker; + struct delayed_work waker; unsigned ref_count; + unsigned long last_commit_jiffies; spinlock_t lock; struct bio_list deferred_bios; @@ -1271,6 +1274,12 @@ static void process_bio(struct thin_c *tc, struct bio *bio) } } +static int need_commit_due_to_time(struct pool *pool) +{ + return jiffies < pool->last_commit_jiffies || + jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; +} + static void process_deferred_bios(struct pool *pool) { unsigned long flags; @@ -1312,7 +1321,7 @@ static void process_deferred_bios(struct pool *pool) bio_list_init(&pool->deferred_flush_bios); spin_unlock_irqrestore(&pool->lock, flags); - if (bio_list_empty(&bios)) + if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) return; r = dm_pool_commit_metadata(pool->pmd); @@ -1323,6 +1332,7 @@ static void process_deferred_bios(struct pool *pool) bio_io_error(bio); return; } + pool->last_commit_jiffies = jiffies; while ((bio = bio_list_pop(&bios))) generic_make_request(bio); @@ -1336,6 +1346,17 @@ static void do_worker(struct work_struct *ws) process_deferred_bios(pool); } +/* + * We want to commit periodically so that not too much + * unwritten data builds up. + */ +static void do_waker(struct work_struct *ws) +{ + struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker); + wake_worker(pool); + queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); +} + /*----------------------------------------------------------------*/ /* @@ -1545,6 +1566,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, } INIT_WORK(&pool->worker, do_worker); + INIT_DELAYED_WORK(&pool->waker, do_waker); spin_lock_init(&pool->lock); bio_list_init(&pool->deferred_bios); bio_list_init(&pool->deferred_flush_bios); @@ -1571,6 +1593,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_endio_hook_pool; } pool->ref_count = 1; + pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; pool->md_dev = metadata_dev; __pool_table_insert(pool); @@ -1900,7 +1923,7 @@ static void pool_resume(struct dm_target *ti) __requeue_bios(pool); spin_unlock_irqrestore(&pool->lock, flags); - wake_worker(pool); + do_waker(&pool->waker.work); } static void pool_postsuspend(struct dm_target *ti) @@ -1909,6 +1932,7 @@ static void pool_postsuspend(struct dm_target *ti) struct pool_c *pt = ti->private; struct pool *pool = pt->pool; + cancel_delayed_work(&pool->waker); flush_workqueue(pool->wq); r = dm_pool_commit_metadata(pool->pmd); -- cgit v1.2.3 From 71fd5ae25d88841c08d5bbea90c0f0a12ca05509 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 28 Mar 2012 18:41:27 +0100 Subject: dm persistent data: remove space map ref_count entries if redundant Save space by removing entries from the space map ref_count tree if they're no longer needed. Ref counts are stored in two places: a bitmap if the ref_count is below 3, or a btree of uint32_t if 3 or above. When a ref_count that was above 3 drops below we can remove it from the tree and save some metadata space. This removal was commented out before because I was unsure why this was causing under-populated btree nodes. Earlier patches have fixed this issue. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-space-map-common.c | 3 --- 1 file chang