From 742c8fdc31e820503f9267070311d894978d1349 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 21 Oct 2016 10:06:40 -0400 Subject: dm bio prison v2: new interface for the bio prison The deferred set is gone and all methods have _v2 appended to the end of their names to allow for continued use of the original bio prison in DM thin-provisioning. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/Makefile | 1 + drivers/md/dm-bio-prison-v1.c | 464 ++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm-bio-prison-v1.h | 138 +++++++++++++ drivers/md/dm-bio-prison-v2.c | 369 +++++++++++++++++++++++++++++++++ drivers/md/dm-bio-prison-v2.h | 152 ++++++++++++++ drivers/md/dm-bio-prison.c | 424 -------------------------------------- drivers/md/dm-bio-prison.h | 138 ------------- drivers/md/dm-cache-target.c | 2 +- drivers/md/dm-thin.c | 2 +- 9 files changed, 1126 insertions(+), 564 deletions(-) create mode 100644 drivers/md/dm-bio-prison-v1.c create mode 100644 drivers/md/dm-bio-prison-v1.h create mode 100644 drivers/md/dm-bio-prison-v2.c create mode 100644 drivers/md/dm-bio-prison-v2.h delete mode 100644 drivers/md/dm-bio-prison.c delete mode 100644 drivers/md/dm-bio-prison.h (limited to 'drivers') diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 3cbda1af87a0..d378b1db7852 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -11,6 +11,7 @@ dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ dm-mirror-y += dm-raid1.o dm-log-userspace-y \ += dm-log-userspace-base.o dm-log-userspace-transfer.o +dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o dm-thin-pool-y += dm-thin.o dm-thin-metadata.o dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o dm-cache-smq-y += dm-cache-policy-smq.o diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c new file mode 100644 index 000000000000..ae7da2c30a57 --- /dev/null +++ b/drivers/md/dm-bio-prison-v1.c @@ -0,0 +1,464 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm.h" +#include "dm-bio-prison-v1.h" +#include "dm-bio-prison-v2.h" + +#include +#include +#include +#include + +/*----------------------------------------------------------------*/ + +#define MIN_CELLS 1024 + +struct dm_bio_prison { + spinlock_t lock; + mempool_t *cell_pool; + struct rb_root cells; +}; + +static struct kmem_cache *_cell_cache; + +/*----------------------------------------------------------------*/ + +/* + * @nr_cells should be the number of cells you want in use _concurrently_. + * Don't confuse it with the number of distinct keys. + */ +struct dm_bio_prison *dm_bio_prison_create(void) +{ + struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + + if (!prison) + return NULL; + + spin_lock_init(&prison->lock); + + prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); + if (!prison->cell_pool) { + kfree(prison); + return NULL; + } + + prison->cells = RB_ROOT; + + return prison; +} +EXPORT_SYMBOL_GPL(dm_bio_prison_create); + +void dm_bio_prison_destroy(struct dm_bio_prison *prison) +{ + mempool_destroy(prison->cell_pool); + kfree(prison); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_destroy); + +struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp) +{ + return mempool_alloc(prison->cell_pool, gfp); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell); + +void dm_bio_prison_free_cell(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell) +{ + mempool_free(cell, prison->cell_pool); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); + +static void __setup_new_cell(struct dm_cell_key *key, + struct bio *holder, + struct dm_bio_prison_cell *cell) +{ + memcpy(&cell->key, key, sizeof(cell->key)); + cell->holder = holder; + bio_list_init(&cell->bios); +} + +static int cmp_keys(struct dm_cell_key *lhs, + struct dm_cell_key *rhs) +{ + if (lhs->virtual < rhs->virtual) + return -1; + + if (lhs->virtual > rhs->virtual) + return 1; + + if (lhs->dev < rhs->dev) + return -1; + + if (lhs->dev > rhs->dev) + return 1; + + if (lhs->block_end <= rhs->block_begin) + return -1; + + if (lhs->block_begin >= rhs->block_end) + return 1; + + return 0; +} + +static int __bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + int r; + struct rb_node **new = &prison->cells.rb_node, *parent = NULL; + + while (*new) { + struct dm_bio_prison_cell *cell = + container_of(*new, struct dm_bio_prison_cell, node); + + r = cmp_keys(key, &cell->key); + + parent = *new; + if (r < 0) + new = &((*new)->rb_left); + else if (r > 0) + new = &((*new)->rb_right); + else { + if (inmate) + bio_list_add(&cell->bios, inmate); + *cell_result = cell; + return 1; + } + } + + __setup_new_cell(key, inmate, cell_prealloc); + *cell_result = cell_prealloc; + + rb_link_node(&cell_prealloc->node, parent, new); + rb_insert_color(&cell_prealloc->node, &prison->cells); + + return 0; +} + +static int bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} + +int dm_bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + return bio_detain(prison, key, inmate, cell_prealloc, cell_result); +} +EXPORT_SYMBOL_GPL(dm_bio_detain); + +int dm_get_cell(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + return bio_detain(prison, key, NULL, cell_prealloc, cell_result); +} +EXPORT_SYMBOL_GPL(dm_get_cell); + +/* + * @inmates must have been initialised prior to this call + */ +static void __cell_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *inmates) +{ + rb_erase(&cell->node, &prison->cells); + + if (inmates) { + if (cell->holder) + bio_list_add(inmates, cell->holder); + bio_list_merge(inmates, &cell->bios); + } +} + +void dm_cell_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *bios) +{ + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release(prison, cell, bios); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_release); + +/* + * Sometimes we don't want the holder, just the additional bios. + */ +static void __cell_release_no_holder(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *inmates) +{ + rb_erase(&cell->node, &prison->cells); + bio_list_merge(inmates, &cell->bios); +} + +void dm_cell_release_no_holder(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *inmates) +{ + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release_no_holder(prison, cell, inmates); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); + +void dm_cell_error(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, int error) +{ + struct bio_list bios; + struct bio *bio; + + bio_list_init(&bios); + dm_cell_release(prison, cell, &bios); + + while ((bio = bio_list_pop(&bios))) { + bio->bi_error = error; + bio_endio(bio); + } +} +EXPORT_SYMBOL_GPL(dm_cell_error); + +void dm_cell_visit_release(struct dm_bio_prison *prison, + void (*visit_fn)(void *, struct dm_bio_prison_cell *), + void *context, + struct dm_bio_prison_cell *cell) +{ + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + visit_fn(context, cell); + rb_erase(&cell->node, &prison->cells); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_visit_release); + +static int __promote_or_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell) +{ + if (bio_list_empty(&cell->bios)) { + rb_erase(&cell->node, &prison->cells); + return 1; + } + + cell->holder = bio_list_pop(&cell->bios); + return 0; +} + +int dm_cell_promote_or_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __promote_or_release(prison, cell); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_promote_or_release); + +/*----------------------------------------------------------------*/ + +#define DEFERRED_SET_SIZE 64 + +struct dm_deferred_entry { + struct dm_deferred_set *ds; + unsigned count; + struct list_head work_items; +}; + +struct dm_deferred_set { + spinlock_t lock; + unsigned current_entry; + unsigned sweeper; + struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; +}; + +struct dm_deferred_set *dm_deferred_set_create(void) +{ + int i; + struct dm_deferred_set *ds; + + ds = kmalloc(sizeof(*ds), GFP_KERNEL); + if (!ds) + return NULL; + + spin_lock_init(&ds->lock); + ds->current_entry = 0; + ds->sweeper = 0; + for (i = 0; i < DEFERRED_SET_SIZE; i++) { + ds->entries[i].ds = ds; + ds->entries[i].count = 0; + INIT_LIST_HEAD(&ds->entries[i].work_items); + } + + return ds; +} +EXPORT_SYMBOL_GPL(dm_deferred_set_create); + +void dm_deferred_set_destroy(struct dm_deferred_set *ds) +{ + kfree(ds); +} +EXPORT_SYMBOL_GPL(dm_deferred_set_destroy); + +struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) +{ + unsigned long flags; + struct dm_deferred_entry *entry; + + spin_lock_irqsave(&ds->lock, flags); + entry = ds->entries + ds->current_entry; + entry->count++; + spin_unlock_irqrestore(&ds->lock, flags); + + return entry; +} +EXPORT_SYMBOL_GPL(dm_deferred_entry_inc); + +static unsigned ds_next(unsigned index) +{ + return (index + 1) % DEFERRED_SET_SIZE; +} + +static void __sweep(struct dm_deferred_set *ds, struct list_head *head) +{ + while ((ds->sweeper != ds->current_entry) && + !ds->entries[ds->sweeper].count) { + list_splice_init(&ds->entries[ds->sweeper].work_items, head); + ds->sweeper = ds_next(ds->sweeper); + } + + if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) + list_splice_init(&ds->entries[ds->sweeper].work_items, head); +} + +void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) +{ + unsigned long flags; + + spin_lock_irqsave(&entry->ds->lock, flags); + BUG_ON(!entry->count); + --entry->count; + __sweep(entry->ds, head); + spin_unlock_irqrestore(&entry->ds->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_deferred_entry_dec); + +/* + * Returns 1 if deferred or 0 if no pending items to delay job. + */ +int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) +{ + int r = 1; + unsigned long flags; + unsigned next_entry; + + spin_lock_irqsave(&ds->lock, flags); + if ((ds->sweeper == ds->current_entry) && + !ds->entries[ds->current_entry].count) + r = 0; + else { + list_add(work, &ds->entries[ds->current_entry].work_items); + next_entry = ds_next(ds->current_entry); + if (!ds->entries[next_entry].count) + ds->current_entry = next_entry; + } + spin_unlock_irqrestore(&ds->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_deferred_set_add_work); + +/*----------------------------------------------------------------*/ + +static int __init dm_bio_prison_init_v1(void) +{ + _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); + if (!_cell_cache) + return -ENOMEM; + + return 0; +} + +static void dm_bio_prison_exit_v1(void) +{ + kmem_cache_destroy(_cell_cache); + _cell_cache = NULL; +} + +static int (*_inits[])(void) __initdata = { + dm_bio_prison_init_v1, + dm_bio_prison_init_v2, +}; + +static void (*_exits[])(void) = { + dm_bio_prison_exit_v1, + dm_bio_prison_exit_v2, +}; + +static int __init dm_bio_prison_init(void) +{ + const int count = ARRAY_SIZE(_inits); + + int r, i; + + for (i = 0; i < count; i++) { + r = _inits[i](); + if (r) + goto bad; + } + + return 0; + + bad: + while (i--) + _exits[i](); + + return r; +} + +static void __exit dm_bio_prison_exit(void) +{ + int i = ARRAY_SIZE(_exits); + + while (i--) + _exits[i](); +} + +/* + * module hooks + */ +module_init(dm_bio_prison_init); +module_exit(dm_bio_prison_exit); + +MODULE_DESCRIPTION(DM_NAME " bio prison"); +MODULE_AUTHOR("Joe Thornber "); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-bio-prison-v1.h b/drivers/md/dm-bio-prison-v1.h new file mode 100644 index 000000000000..cddd4ac07e2c --- /dev/null +++ b/drivers/md/dm-bio-prison-v1.h @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2011-2017 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_BIO_PRISON_H +#define DM_BIO_PRISON_H + +#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ +#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ + +#include +#include + +/*----------------------------------------------------------------*/ + +/* + * Sometimes we can't deal with a bio straight away. We put them in prison + * where they can't cause any mischief. Bios are put in a cell identified + * by a key, multiple bios can be in the same cell. When the cell is + * subsequently unlocked the bios become available. + */ +struct dm_bio_prison; + +/* + * Keys define a range of blocks within either a virtual or physical + * device. + */ +struct dm_cell_key { + int virtual; + dm_thin_id dev; + dm_block_t block_begin, block_end; +}; + +/* + * Treat this as opaque, only in header so callers can manage allocation + * themselves. + */ +struct dm_bio_prison_cell { + struct list_head user_list; /* for client use */ + struct rb_node node; + + struct dm_cell_key key; + struct bio *holder; + struct bio_list bios; +}; + +struct dm_bio_prison *dm_bio_prison_create(void); +void dm_bio_prison_destroy(struct dm_bio_prison *prison); + +/* + * These two functions just wrap a mempool. This is a transitory step: + * Eventually all bio prison clients should manage their own cell memory. + * + * Like mempool_alloc(), dm_bio_prison_alloc_cell() can only fail if called + * in interrupt context or passed GFP_NOWAIT. + */ +struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, + gfp_t gfp); +void dm_bio_prison_free_cell(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell); + +/* + * Creates, or retrieves a cell that overlaps the given key. + * + * Returns 1 if pre-existing cell returned, zero if new cell created using + * @cell_prealloc. + */ +int dm_get_cell(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result); + +/* + * An atomic op that combines retrieving or creating a cell, and adding a + * bio to it. + * + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. + */ +int dm_bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result); + +void dm_cell_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *bios); +void dm_cell_release_no_holder(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *inmates); +void dm_cell_error(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, int error); + +/* + * Visits the cell and then releases. Guarantees no new inmates are + * inserted between the visit and release. + */ +void dm_cell_visit_release(struct dm_bio_prison *prison, + void (*visit_fn)(void *, struct dm_bio_prison_cell *), + void *context, struct dm_bio_prison_cell *cell); + +/* + * Rather than always releasing the prisoners in a cell, the client may + * want to promote one of them to be the new holder. There is a race here + * though between releasing an empty cell, and other threads adding new + * inmates. So this function makes the decision with its lock held. + * + * This function can have two outcomes: + * i) An inmate is promoted to be the holder of the cell (return value of 0). + * ii) The cell has no inmate for promotion and is released (return value of 1). + */ +int dm_cell_promote_or_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell); + +/*----------------------------------------------------------------*/ + +/* + * We use the deferred set to keep track of pending reads to shared blocks. + * We do this to ensure the new mapping caused by a write isn't performed + * until these prior reads have completed. Otherwise the insertion of the + * new mapping could free the old block that the read bios are mapped to. + */ + +struct dm_deferred_set; +struct dm_deferred_entry; + +struct dm_deferred_set *dm_deferred_set_create(void); +void dm_deferred_set_destroy(struct dm_deferred_set *ds); + +struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds); +void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head); +int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work); + +/*----------------------------------------------------------------*/ + +#endif diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c new file mode 100644 index 000000000000..c9b11f799cd8 --- /dev/null +++ b/drivers/md/dm-bio-prison-v2.c @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2012-2017 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm.h" +#include "dm-bio-prison-v2.h" + +#include +#include +#include +#include +#include + +/*----------------------------------------------------------------*/ + +#define MIN_CELLS 1024 + +struct dm_bio_prison_v2 { + struct workqueue_struct *wq; + + spinlock_t lock; + mempool_t *cell_pool; + struct rb_root cells; +}; + +static struct kmem_cache *_cell_cache; + +/*----------------------------------------------------------------*/ + +/* + * @nr_cells should be the number of cells you want in use _concurrently_. + * Don't confuse it with the number of distinct keys. + */ +struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq) +{ + struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + + if (!prison) + return NULL; + + prison->wq = wq; + spin_lock_init(&prison->lock); + + prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); + if (!prison->cell_pool) { + kfree(prison); + return NULL; + } + + prison->cells = RB_ROOT; + + return prison; +} +EXPORT_SYMBOL_GPL(dm_bio_prison_create_v2); + +void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison) +{ + mempool_destroy(prison->cell_pool); + kfree(prison); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2); + +struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp) +{ + return mempool_alloc(prison->cell_pool, gfp); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2); + +void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell) +{ + mempool_free(cell, prison->cell_pool); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2); + +static void __setup_new_cell(struct dm_cell_key_v2 *key, + struct dm_bio_prison_cell_v2 *cell) +{ + memset(cell, 0, sizeof(*cell)); + memcpy(&cell->key, key, sizeof(cell->key)); + bio_list_init(&cell->bios); +} + +static int cmp_keys(struct dm_cell_key_v2 *lhs, + struct dm_cell_key_v2 *rhs) +{ + if (lhs->virtual < rhs->virtual) + return -1; + + if (lhs->virtual > rhs->virtual) + return 1; + + if (lhs->dev < rhs->dev) + return -1; + + if (lhs->dev > rhs->dev) + return 1; + + if (lhs->block_end <= rhs->block_begin) + return -1; + + if (lhs->block_begin >= rhs->block_end) + return 1; + + return 0; +} + +/* + * Returns true if node found, otherwise it inserts a new one. + */ +static bool __find_or_insert(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **result) +{ + int r; + struct rb_node **new = &prison->cells.rb_node, *parent = NULL; + + while (*new) { + struct dm_bio_prison_cell_v2 *cell = + container_of(*new, struct dm_bio_prison_cell_v2, node); + + r = cmp_keys(key, &cell->key); + + parent = *new; + if (r < 0) + new = &((*new)->rb_left); + + else if (r > 0) + new = &((*new)->rb_right); + + else { + *result = cell; + return true; + } + } + + __setup_new_cell(key, cell_prealloc); + *result = cell_prealloc; + rb_link_node(&cell_prealloc->node, parent, new); + rb_insert_color(&cell_prealloc->node, &prison->cells); + + return false; +} + +static bool __get(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct bio *inmate, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell) +{ + if (__find_or_insert(prison, key, cell_prealloc, cell)) { + if ((*cell)->exclusive_lock) { + if (lock_level <= (*cell)->exclusive_level) { + bio_list_add(&(*cell)->bios, inmate); + return false; + } + } + + (*cell)->shared_count++; + + } else + (*cell)->shared_count = 1; + + return true; +} + +bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct bio *inmate, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __get(prison, key, lock_level, inmate, cell_prealloc, cell_result); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_get_v2); + +static bool __put(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell) +{ + BUG_ON(!cell->shared_count); + cell->shared_count--; + + // FIXME: shared locks granted above the lock level could starve this + if (!cell->shared_count) { + if (cell->exclusive_lock){ + if (cell->quiesce_continuation) { + queue_work(prison->wq, cell->quiesce_continuation); + cell->quiesce_continuation = NULL; + } + } else { + rb_erase(&cell->node, &prison->cells); + return true; + } + } + + return false; +} + +bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell) +{ + bool r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __put(prison, cell); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_put_v2); + +static int __lock(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result) +{ + struct dm_bio_prison_cell_v2 *cell; + + if (__find_or_insert(prison, key, cell_prealloc, &cell)) { + if (cell->exclusive_lock) + return -EBUSY; + + cell->exclusive_lock = true; + cell->exclusive_level = lock_level; + *cell_result = cell; + + // FIXME: we don't yet know what level these shared locks + // were taken at, so have to quiesce them all. + return cell->shared_count > 0; + + } else { + cell = cell_prealloc; + cell->shared_count = 0; + cell->exclusive_lock = true; + cell->exclusive_level = lock_level; + *cell_result = cell; + } + + return 0; +} + +int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __lock(prison, key, lock_level, cell_prealloc, cell_result); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_lock_v2); + +static void __quiesce(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct work_struct *continuation) +{ + if (!cell->shared_count) + queue_work(prison->wq, continuation); + else + cell->quiesce_continuation = continuation; +} + +void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct work_struct *continuation) +{ + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + __quiesce(prison, cell, continuation); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_quiesce_v2); + +static int __promote(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + unsigned new_lock_level) +{ + if (!cell->exclusive_lock) + return -EINVAL; + + cell->exclusive_level = new_lock_level; + return cell->shared_count > 0; +} + +int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + unsigned new_lock_level) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __promote(prison, cell, new_lock_level); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_lock_promote_v2); + +static bool __unlock(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct bio_list *bios) +{ + BUG_ON(!cell->exclusive_lock); + + bio_list_merge(bios, &cell->bios); + bio_list_init(&cell->bios); + + if (cell->shared_count) { + cell->exclusive_lock = 0; + return false; + } + + rb_erase(&cell->node, &prison->cells); + return true; +} + +bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct bio_list *bios) +{ + bool r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __unlock(prison, cell, bios); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_unlock_v2); + +/*----------------------------------------------------------------*/ + +int __init dm_bio_prison_init_v2(void) +{ + _cell_cache = KMEM_CACHE(dm_bio_prison_cell_v2, 0); + if (!_cell_cache) + return -ENOMEM; + + return 0; +} + +void dm_bio_prison_exit_v2(void) +{ + kmem_cache_destroy(_cell_cache); + _cell_cache = NULL; +} diff --git a/drivers/md/dm-bio-prison-v2.h b/drivers/md/dm-bio-prison-v2.h new file mode 100644 index 000000000000..6e04234268db --- /dev/null +++ b/drivers/md/dm-bio-prison-v2.h @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2011-2017 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_BIO_PRISON_V2_H +#define DM_BIO_PRISON_V2_H + +#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ +#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ + +#include +#include +#include + +/*----------------------------------------------------------------*/ + +int dm_bio_prison_init_v2(void); +void dm_bio_prison_exit_v2(void); + +/* + * Sometimes we can't deal with a bio straight away. We put them in prison + * where they can't cause any mischief. Bios are put in a cell identified + * by a key, multiple bios can be in the same cell. When the cell is + * subsequently unlocked the bios become available. + */ +struct dm_bio_prison_v2; + +/* + * Keys define a range of blocks within either a virtual or physical + * device. + */ +struct dm_cell_key_v2 { + int virtual; + dm_thin_id dev; + dm_block_t block_begin, block_end; +}; + +/* + * Treat this as opaque, only in header so callers can manage allocation + * themselves. + */ +struct dm_bio_prison_cell_v2 { + // FIXME: pack these + bool exclusive_lock; + unsigned exclusive_level; + unsigned shared_count; + struct work_struct *quiesce_continuation; + + struct rb_node node; + struct dm_cell_key_v2 key; + struct bio_list bios; +}; + +struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq); +void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison); + +/* + * These two functions just wrap a mempool. This is a transitory step: + * Eventually all bio prison clients should manage their own cell memory. + * + * Like mempool_alloc(), dm_bio_prison_alloc_cell_v2() can only fail if called + * in interrupt context or passed GFP_NOWAIT. + */ +struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, + gfp_t gfp); +void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell); + +/* + * Shared locks have a bio associated with them. + * + * If the lock is granted the caller can continue to use the bio, and must + * call dm_cell_put_v2() to drop the reference count when finished using it. + * + * If the lock cannot be granted then the bio will be tracked within the + * cell, and later given to the holder of the exclusive lock. + * + * See dm_cell_lock_v2() for discussion of the lock_level parameter. + * + * Compare *cell_result with cell_prealloc to see if the prealloc was used. + * If cell_prealloc was used then inmate wasn't added to it. + * + * Returns true if the lock is granted. + */ +bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct bio *inmate, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result); + +/* + * Decrement the shared reference count for the lock. Returns true if + * returning ownership of the cell (ie. you should free it). + */ +bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell); + +/* + * Locks a cell. No associated bio. Exclusive locks get priority. These + * locks constrain whether the io locks are granted according to level. + * + * Shared locks will still be granted if the lock_level is > (not = to) the + * exclusive lock level. + * + * If an _exclusive_ lock is already held then -EBUSY is returned. + * + * Return values: + * < 0 - error + * 0 - locked; no quiescing needed + * 1 - locked; quiescing needed + */ +int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result); + +void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct work_struct *continuation); + +/* + * Promotes an _exclusive_ lock to a higher lock level. + * + * Return values: + * < 0 - error + * 0 - promoted; no quiescing needed + * 1 - promoted; quiescing needed + */ +int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + unsigned new_lock_level); + +/* + * Adds any held bios to the bio list. + * + * There may be shared locks still held at this point even if you quiesced + * (ie. different lock levels). + * + * Returns true if returning ownership of the cell (ie. you should free + * it). + */ +bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct bio_list *bios); + +/*----------------------------------------------------------------*/ + +#endif diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c deleted file mode 100644 index 03af174485d3..000000000000 --- a/drivers/md/dm-bio-prison.c +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Copyright (C) 2012 Red Hat, Inc. - * - * This file is released under the GPL. - */ - -#include "dm.h" -#include "dm-bio-prison.h" - -#include -#include -#include -#include - -/*----------------------------------------------------------------*/ - -#define MIN_CELLS 1024 - -struct dm_bio_prison { - spinlock_t lock; - mempool_t *cell_pool; - struct rb_root cells; -}; - -static struct kmem_cache *_cell_cache; - -/*----------------------------------------------------------------*/ - -/* - * @nr_cells should be the number of cells you want in use _concurrently_. - * Don't confuse it with the number of distinct keys. - */ -struct dm_bio_prison *dm_bio_prison_create(void) -{ - struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL); - - if (!prison) - return NULL; - - spin_lock_init(&prison->lock); - - prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); - if (!prison->cell_pool) { - kfree(prison); - return NULL; - } - - prison->cells = RB_ROOT; - - return prison; -} -EXPORT_SYMBOL_GPL(dm_bio_prison_create); - -void dm_bio_prison_destroy(struct dm_bio_prison *prison) -{ - mempool_destroy(prison->cell_pool); - kfree(prison); -} -EXPORT_SYMBOL_GPL(dm_bio_prison_destroy); - -struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp) -{ - return mempool_alloc(prison->cell_pool, gfp); -} -EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell); - -void dm_bio_prison_free_cell(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell) -{ - mempool_free(cell, prison->cell_pool); -} -EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); - -static void __setup_new_cell(struct dm_cell_key *key, - struct bio *holder, - struct dm_bio_prison_cell *cell) -{ - memcpy(&cell->key, key, sizeof(cell->key)); - cell->holder = holder; - bio_list_init(&cell->bios); -} - -static int cmp_keys(struct dm_cell_key *lhs, - struct dm_cell_key *rhs) -{ - if (lhs->virtual < rhs->virtual) - return -1; - - if (lhs->virtual > rhs->virtual) - return 1; - - if (lhs->dev < rhs->dev) - return -1; - - if (lhs->dev > rhs->dev) - return 1; - - if (lhs->block_end <= rhs->block_begin) - return -1; - - if (lhs->block_begin >= rhs->block_end) - return 1; - - return 0; -} - -static int __bio_detain(struct dm_bio_prison *prison, - struct dm_cell_key *key, - struct bio *inmate, - struct dm_bio_prison_cell *cell_prealloc, - struct dm_bio_prison_cell **cell_result) -{ - int r; - struct rb_node **new = &prison->cells.rb_node, *parent = NULL; - - while (*new) { - struct dm_bio_prison_cell *cell = - container_of(*new, struct dm_bio_prison_cell, node); - - r = cmp_keys(key, &cell->key); - - parent = *new; - if (r < 0) - new = &((*new)->rb_left); - else if (r > 0) - new = &((*new)->rb_right); - else { - if (inmate) - bio_list_add(&cell->bios, inmate); - *cell_result = cell; - return 1; - } - } - - __setup_new_cell(key, inmate, cell_prealloc); - *cell_result = cell_prealloc; - - rb_link_node(&cell_prealloc->node, parent, new); - rb_insert_color(&cell_prealloc->node, &prison->cells); - - return 0; -} - -static int bio_detain(struct dm_bio_prison *prison, - struct dm_cell_key *key, - struct bio *inmate, - struct dm_bio_prison_cell *cell_prealloc, - struct dm_bio_prison_cell **cell_result) -{ - int r; - unsigned long flags; - - spin_lock_irqsave(&prison->lock, flags); - r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result); - spin_unlock_irqrestore(&prison->lock, flags); - - return r; -} - -int dm_bio_detain(struct dm_bio_prison *prison, - struct dm_cell_key *key, - struct bio *inmate, - struct dm_bio_prison_cell *cell_prealloc, - struct dm_bio_prison_cell **cell_result) -{ - return bio_detain(prison, key, inmate, cell_prealloc, cell_result); -} -EXPORT_SYMBOL_GPL(dm_bio_detain); - -int dm_get_cell(struct dm_bio_prison *prison, - struct dm_cell_key *key, - struct dm_bio_prison_cell *cell_prealloc, - struct dm_bio_prison_cell **cell_result) -{ - return bio_detain(prison, key, NULL, cell_prealloc, cell_result); -} -EXPORT_SYMBOL_GPL(dm_get_cell); - -/* - * @inmates must have been initialised prior to this call - */ -static void __cell_release(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell, - struct bio_list *inmates) -{ - rb_erase(&cell->node, &prison->cells); - - if (inmates) { - if (cell->holder) - bio_list_add(inmates, cell->holder); - bio_list_merge(inmates, &cell->bios); - } -} - -void dm_cell_release(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell, - struct bio_list *bios) -{ - unsigned long flags; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release(prison, cell, bios); - spin_unlock_irqrestore(&prison->lock, flags); -} -EXPORT_SYMBOL_GPL(dm_cell_release); - -/* - * Sometimes we don't want the holder, just the additional bios. - */ -static void __cell_release_no_holder(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell, - struct bio_list *inmates) -{ - rb_erase(&cell->node, &prison->cells); - bio_list_merge(inmates, &cell->bios); -} - -void dm_cell_release_no_holder(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell, - struct bio_list *inmates) -{ - unsigned long flags; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_no_holder(prison, cell, inmates); - spin_unlock_irqrestore(&prison->lock, flags); -} -EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); - -void dm_cell_error(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell, int error) -{ - struct bio_list bios; - struct bio *bio; - - bio_list_init(&bios); - dm_cell_release(prison, cell, &bios); - - while ((bio = bio_list_pop(&bios))) { - bio->bi_error = error; - bio_endio(bio); - } -} -EXPORT_SYMBOL_GPL(dm_cell_error); - -void dm_cell_visit_release(struct dm_bio_prison *prison, - void (*visit_fn)(void *, struct dm_bio_prison_cell *), - void *context, - struct dm_bio_prison_cell *cell) -{ - unsigned long flags; - - spin_lock_irqsave(&prison->lock, flags); - visit_fn(context, cell); - rb_erase(&cell->node, &prison->cells); - spin_unlock_irqrestore(&prison->lock, flags); -} -EXPORT_SYMBOL_GPL(dm_cell_visit_release); - -static int __promote_or_release(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell) -{ - if (bio_list_empty(&cell->bios)) { - rb_erase(&cell->node, &prison->cells); - return 1; - } - - cell->holder = bio_list_pop(&cell->bios); - return 0; -} - -int dm_cell_promote_or_release(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell) -{ - int r; - unsigned long flags; - - spin_lock_irqsave(&prison->lock, flags); - r = __promote_or_release(prison, cell); - spin_unlock_irqrestore(&prison->lock, flags); - - return r; -} -EXPORT_SYMBOL_GPL(dm_cell_promote_or_release); - -/*----------------------------------------------------------------*/ - -#define DEFERRED_SET_SIZE 64 - -struct dm_deferred_entry { - struct dm_deferred_set *ds; - unsigned count; - struct list_head work_items; -}; - -struct dm_deferred_set { - spinlock_t lock; - unsigned current_entry; - unsigned sweeper; - struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; -}; - -struct dm_deferred_set *dm_deferred_set_create(void) -{ - int i; - struct dm_deferred_set *ds; - - ds = kmalloc(sizeof(*ds), GFP_KERNEL); - if (!ds) - return NULL; - - spin_lock_init(&ds->lock); - ds->current_entry = 0; - ds->sweeper = 0; - for (i = 0; i < DEFERRED_SET_SIZE; i++) { - ds->entries[i].ds = ds; - ds->entries[i].count = 0; - INIT_LIST_HEAD(&ds->entries[i].work_items); - } - - return ds; -} -EXPORT_SYMBOL_GPL(dm_deferred_set_create); - -void dm_deferred_set_destroy(struct dm_deferred_set *ds) -{ - kfree(ds); -} -EXPORT_SYMBOL_GPL(dm_deferred_set_destroy); - -struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) -{ - unsigned long flags; - struct dm_deferred_entry *entry; - - spin_lock_irqsave(&ds->lock, flags); - entry = ds->entries + ds->current_entry; - entry->count++; - spin_unlock_irqrestore(&ds->lock, flags); - - return entry; -} -EXPORT_SYMBOL_GPL(dm_deferred_entry_inc); - -static unsigned ds_next(unsigned index) -{ - return (index + 1) % DEFERRED_SET_SIZE; -} - -static void __sweep(struct dm_deferred_set *ds, struct list_head *head) -{ - while ((ds->sweeper != ds->current_entry) && - !ds->entries[ds->sweeper].count) { - list_splice_init(&ds->entries[ds->sweeper].work_items, head); - ds->sweeper = ds_next(ds->sweeper); - } - - if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) - list_splice_init(&ds->entries[ds->sweeper].work_items, head); -} - -void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) -{ - unsigned long flags; - - spin_lock_irqsave(&entry->ds->lock, flags); - BUG_ON(!entry->count); - --entry->count; - __sweep(entry->ds, head); - spin_unlock_irqrestore(&entry->ds->lock, flags); -} -EXPORT_SYMBOL_GPL(dm_deferred_entry_dec); - -/* - * Returns 1 if deferred or 0 if no pending items to delay job. - */ -int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) -{ - int r = 1; - unsigned long flags; - unsigned next_entry; - - spin_lock_irqsave(&ds->lock, flags); - if ((ds->sweeper == ds->current_entry) && - !ds->entries[ds->current_entry].count) - r = 0; - else { - list_add(work, &ds->entries[ds->current_entry].work_items); - next_entry = ds_next(ds->current_entry); - if (!ds->entries[next_entry].count) - ds->current_entry = next_entry; - } - spin_unlock_irqrestore(&ds->lock, flags); - - return r; -} -EXPORT_SYMBOL_GPL(dm_deferred_set_add_work); - -/*----------------------------------------------------------------*/ - -static int __init dm_bio_prison_init(void) -{ - _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); - if (!_cell_cache) - return -ENOMEM; - - return 0; -} - -static void __exit dm_bio_prison_exit(void) -{ - kmem_cache_destroy(_cell_cache); - _cell_cache = NULL; -} - -/* - * module hooks - */ -module_init(dm_bio_prison_init); -module_exit(dm_bio_prison_exit); - -MODULE_DESCRIPTION(DM_NAME " bio prison"); -MODULE_AUTHOR("Joe Thornber "); -MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h deleted file mode 100644 index 54352f009bfd..000000000000 --- a/drivers/md/dm-bio-prison.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (C) 2011-2012 Red Hat, Inc. - * - * This file is released under the GPL. - */ - -#ifndef DM_BIO_PRISON_H -#define DM_BIO_PRISON_H - -#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ -#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ - -#include -#include - -/*----------------------------------------------------------------*/ - -/* - * Sometimes we can't deal with a bio straight away. We put them in prison - * where they can't cause any mischief. Bios are put in a cell identified - * by a key, multiple bios can be in the same cell. When the cell is - * subsequently unlocked the bios become available. - */ -struct dm_bio_prison; - -/* - * Keys define a range of blocks within either a virtual or physical - * device. - */ -struct dm_cell_key { - int virtual; - dm_thin_id dev; - dm_block_t block_begin, block_end; -}; - -/* - * Treat this as opaque, only in header so callers can manage allocation - * themselves. - */ -struct dm_bio_prison_cell { - struct list_head user_list; /* for client use */ - struct rb_node node; - - struct dm_cell_key key; - struct bio *holder; - struct bio_list bios; -}; - -struct dm_bio_prison *dm_bio_prison_create(void); -void dm_bio_prison_destroy(struct dm_bio_prison *prison); - -/* - * These two functions just wrap a mempool. This is a transitory step: - * Eventually all bio prison clients should manage their own cell memory. - * - * Like mempool_alloc(), dm_bio_prison_alloc_cell() can only fail if called - * in interrupt context or passed GFP_NOWAIT. - */ -struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, - gfp_t gfp); -void dm_bio_prison_free_cell(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell); - -/* - * Creates, or retrieves a cell that overlaps the given key. - * - * Returns 1 if pre-existing cell returned, zero if new cell created using - * @cell_prealloc. - */ -int dm_get_cell(struct dm_bio_prison *prison, - struct dm_cell_key *key, - struct dm_bio_prison_cell *cell_prealloc, - struct dm_bio_prison_cell **cell_result); - -/* - * An atomic op that combines retrieving or creating a cell, and adding a - * bio to it. - * - * Returns 1 if the cell was already held, 0 if @inmate is the new holder. - */ -int dm_bio_detain(struct dm_bio_prison *prison, - struct dm_cell_key *key, - struct bio *inmate, - struct dm_bio_prison_cell *cell_prealloc, - struct dm_bio_prison_cell **cell_result); - -void dm_cell_release(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell, - struct bio_list *bios); -void dm_cell_release_no_holder(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell, - struct bio_list *inmates); -void dm_cell_error(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell, int error); - -/* - * Visits the cell and then releases. Guarantees no new inmates are - * inserted between the visit and release. - */ -void dm_cell_visit_release(struct dm_bio_prison *prison, - void (*visit_fn)(void *, struct dm_bio_prison_cell *), - void *context, struct dm_bio_prison_cell *cell); - -/* - * Rather than always releasing the prisoners in a cell, the client may - * want to promote one of them to be the new holder. There is a race here - * though between releasing an empty cell, and other threads adding new - * inmates. So this function makes the decision with its lock held. - * - * This function can have two outcomes: - * i) An inmate is promoted to be the holder of the cell (return value of 0). - * ii) The cell has no inmate for promotion and is released (return value of 1). - */ -int dm_cell_promote_or_release(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell); - -/*----------------------------------------------------------------*/ - -/* - * We use the deferred set to keep track of pending reads to shared blocks. - * We do this to ensure the new mapping caused by a write isn't performed - * until these prior reads have completed. Otherwise the insertion of the - * new mapping could free the old block that the read bios are mapped to. - */ - -struct dm_deferred_set; -struct dm_deferred_entry; - -struct dm_deferred_set *dm_deferred_set_create(void); -void dm_deferred_set_destroy(struct dm_deferred_set *ds); - -struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds); -void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head); -int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work); - -/*----------------------------------------------------------------*/ - -#endif diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 9c689b34e6e7..2eaa414e1509 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -5,7 +5,7 @@ */ #include "dm.h" -#include "dm-bio-prison.h" +#include "dm-bio-prison-v1.h" #include "dm-bio-record.h" #include "dm-cache-metadata.h" diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2b266a2b5035..9b3e2fcbfb1b 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -5,7 +5,7 @@ */ #include "dm-thin-metadata.h" -#include "dm-bio-prison.h" +#include "dm-bio-prison-v1.h" #include "dm.h" #include -- cgit v1.2.3 From b29d4986d0da1a27cd35917cdb433672f5c95d7f Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 15 Dec 2016 04:57:31 -0500 Subject: dm cache: significant rework to leverage dm-bio-prison-v2 The cache policy interfaces have been updated to work well with the new bio-prison v2 interface's ability to queue work immediately (promotion, demotion, etc) -- overriding benefit being reduced latency on processing IO through the cache. Previously such work would be left for the DM cache core to queue on various lists and then process in batches later -- this caused a serious delay in latency for IO driven by the cache. The background tracker code was factored out so that all cache policies can make use of it. Also, the "cleaner" policy has been removed and is now a variant of the smq policy that simply disallows migrations. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 8 - drivers/md/Makefile | 5 +- drivers/md/dm-cache-background-tracker.c | 238 +++ drivers/md/dm-cache-background-tracker.h | 46 + drivers/md/dm-cache-metadata.h | 2 + drivers/md/dm-cache-policy-cleaner.c | 469 ------ drivers/md/dm-cache-policy-internal.h | 76 +- drivers/md/dm-cache-policy-smq.c | 821 +++++----- drivers/md/dm-cache-policy.h | 187 +-- drivers/md/dm-cache-target.c | 2485 +++++++++++++----------------- 10 files changed, 1930 insertions(+), 2407 deletions(-) create mode 100644 drivers/md/dm-cache-background-tracker.c create mode 100644 drivers/md/dm-cache-background-tracker.h delete mode 100644 drivers/md/dm-cache-policy-cleaner.c (limited to 'drivers') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b7767da50c26..982cd0626bc7 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -325,14 +325,6 @@ config DM_CACHE_SMQ of less memory utilization, improved performance and increased adaptability in the face of changing workloads. -config DM_CACHE_CLEANER - tristate "Cleaner Cache Policy (EXPERIMENTAL)" - depends on DM_CACHE - default y - ---help--- - A simple cache policy that writes back all data to the - origin. Used when decommissioning a dm-cache. - config DM_ERA tristate "Era target (EXPERIMENTAL)" depends on BLK_DEV_DM diff --git a/drivers/md/Makefile b/drivers/md/Makefile index d378b1db7852..2801b2fb452d 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -13,9 +13,9 @@ dm-log-userspace-y \ += dm-log-userspace-base.o dm-log-userspace-transfer.o dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o dm-thin-pool-y += dm-thin.o dm-thin-metadata.o -dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o +dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \ + dm-cache-background-tracker.o dm-cache-smq-y += dm-cache-policy-smq.o -dm-cache-cleaner-y += dm-cache-policy-cleaner.o dm-era-y += dm-era-target.o dm-verity-y += dm-verity-target.o md-mod-y += md.o bitmap.o @@ -57,7 +57,6 @@ obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o obj-$(CONFIG_DM_VERITY) += dm-verity.o obj-$(CONFIG_DM_CACHE) += dm-cache.o obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o -obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o obj-$(CONFIG_DM_ERA) += dm-era.o obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c new file mode 100644 index 000000000000..9b1afdfb13f0 --- /dev/null +++ b/drivers/md/dm-cache-background-tracker.c @@ -0,0 +1,238 @@ +/* + * Copyright (C) 2017 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#include "dm-cache-background-tracker.h" + +/*----------------------------------------------------------------*/ + +#define DM_MSG_PREFIX "dm-background-tracker" + +struct bt_work { + struct list_head list; + struct rb_node node; + struct policy_work work; +}; + +struct background_tracker { + unsigned max_work; + atomic_t pending_promotes; + atomic_t pending_writebacks; + atomic_t pending_demotes; + + struct list_head issued; + struct list_head queued; + struct rb_root pending; + + struct kmem_cache *work_cache; +}; + +struct background_tracker *btracker_create(unsigned max_work) +{ + struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL); + + b->max_work = max_work; + atomic_set(&b->pending_promotes, 0); + atomic_set(&b->pending_writebacks, 0); + atomic_set(&b->pending_demotes, 0); + + INIT_LIST_HEAD(&b->issued); + INIT_LIST_HEAD(&b->queued); + + b->pending = RB_ROOT; + b->work_cache = KMEM_CACHE(bt_work, 0); + if (!b->work_cache) { + DMERR("couldn't create mempool for background work items"); + kfree(b); + b = NULL; + } + + return b; +} +EXPORT_SYMBOL_GPL(btracker_create); + +void btracker_destroy(struct background_tracker *b) +{ + kmem_cache_destroy(b->work_cache); + kfree(b); +} +EXPORT_SYMBOL_GPL(btracker_destroy); + +static int cmp_oblock(dm_oblock_t lhs, dm_oblock_t rhs) +{ + if (from_oblock(lhs) < from_oblock(rhs)) + return -1; + + if (from_oblock(rhs) < from_oblock(lhs)) + return 1; + + return 0; +} + +static bool __insert_pending(struct background_tracker *b, + struct bt_work *nw) +{ + int cmp; + struct bt_work *w; + struct rb_node **new = &b->pending.rb_node, *parent = NULL; + + while (*new) { + w = container_of(*new, struct bt_work, node); + + parent = *new; + cmp = cmp_oblock(w->work.oblock, nw->work.oblock); + if (cmp < 0) + new = &((*new)->rb_left); + + else if (cmp > 0) + new = &((*new)->rb_right); + + else + /* already present */ + return false; + } + + rb_link_node(&nw->node, parent, new); + rb_insert_color(&nw->node, &b->pending); + + return true; +} + +static struct bt_work *__find_pending(struct background_tracker *b, + dm_oblock_t oblock) +{ + int cmp; + struct bt_work *w; + struct rb_node **new = &b->pending.rb_node; + + while (*new) { + w = container_of(*new, struct bt_work, node); + + cmp = cmp_oblock(w->work.oblock, oblock); + if (cmp < 0) + new = &((*new)->rb_left); + + else if (cmp > 0) + new = &((*new)->rb_right); + + else + break; + } + + return *new ? w : NULL; +} + + +static void update_stats(struct background_tracker *b, struct policy_work *w, int delta) +{ + switch (w->op) { + case POLICY_PROMOTE: + atomic_add(delta, &b->pending_promotes); + break; + + case POLICY_DEMOTE: + atomic_add(delta, &b->pending_demotes); + break; + + case POLICY_WRITEBACK: + atomic_add(delta, &b->pending_writebacks); + break; + } +} + +unsigned btracker_nr_writebacks_queued(struct background_tracker *b) +{ + return atomic_read(&b->pending_writebacks); +} +EXPORT_SYMBOL_GPL(btracker_nr_writebacks_queued); + +unsigned btracker_nr_demotions_queued(struct background_tracker *b) +{ + return atomic_read(&b->pending_demotes); +} +EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued); + +static bool max_work_reached(struct background_tracker *b) +{ + // FIXME: finish + return false; +} + +int btracker_queue(struct background_tracker *b, + struct policy_work *work, + struct policy_work **pwork) +{ + struct bt_work *w; + + if (pwork) + *pwork = NULL; + + if (max_work_reached(b)) + return -ENOMEM; + + w = kmem_cache_alloc(b->work_cache, GFP_NOWAIT); + if (!w) + return -ENOMEM; + + memcpy(&w->work, work, sizeof(*work)); + + if (!__insert_pending(b, w)) { + /* + * There was a race, we'll just ignore this second + * bit of work for the same oblock. + */ + kmem_cache_free(b->work_cache, w); + return -EINVAL; + } + + if (pwork) { + *pwork = &w->work; + list_add(&w->list, &b->issued); + } else + list_add(&w->list, &b->queued); + update_stats(b, &w->work, 1); + + return 0; +} +EXPORT_SYMBOL_GPL(btracker_queue); + +/* + * Returns -ENODATA if there's no work. + */ +int btracker_issue(struct background_tracker *b, struct policy_work **work) +{ + struct bt_work *w; + + if (list_empty(&b->queued)) + return -ENODATA; + + w = list_first_entry(&b->queued, struct bt_work, list); + list_move(&w->list, &b->issued); + *work = &w->work; + + return 0; +} +EXPORT_SYMBOL_GPL(btracker_issue); + +void btracker_complete(struct background_tracker *b, + struct policy_work *op) +{ + struct bt_work *w = container_of(op, struct bt_work, work); + + update_stats(b, &w->work, -1); + rb_erase(&w->node, &b->pending); + list_del(&w->list); + kmem_cache_free(b->work_cache, w); +} +EXPORT_SYMBOL_GPL(btracker_complete); + +bool btracker_promotion_already_present(struct background_tracker *b, + dm_oblock_t oblock) +{ + return __find_pending(b, oblock) != NULL; +} +EXPORT_SYMBOL_GPL(btracker_promotion_already_present); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-cache-background-tracker.h b/drivers/md/dm-cache-background-tracker.h new file mode 100644 index 000000000000..27ab90dbc275 --- /dev/null +++ b/drivers/md/dm-cache-background-tracker.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2017 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#ifndef DM_CACHE_BACKGROUND_WORK_H +#define DM_CACHE_BACKGROUND_WORK_H + +#include +#include "dm-cache-policy.h" + +/*----------------------------------------------------------------*/ + +struct background_work; +struct background_tracker; + +/* + * FIXME: discuss lack of locking in all methods. + */ +struct background_tracker *btracker_create(unsigned max_work); +void btracker_destroy(struct background_tracker *b); + +unsigned btracker_nr_writebacks_queued(struct background_tracker *b); +unsigned btracker_nr_demotions_queued(struct background_tracker *b); + +/* + * returns -EINVAL iff the work is already queued. -ENOMEM if the work + * couldn't be queued for another reason. + */ +int btracker_queue(struct background_tracker *b, + struct policy_work *work, + struct policy_work **pwork); + +/* + * Returns -ENODATA if there's no work. + */ +int btracker_issue(struct background_tracker *b, struct policy_work **work); +void btracker_complete(struct background_tracker *b, + struct policy_work *op); +bool btracker_promotion_already_present(struct background_tracker *b, + dm_oblock_t oblock); + +/*----------------------------------------------------------------*/ + +#endif diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index 4f07c08cf107..179ed5bf81a3 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h @@ -50,6 +50,8 @@ #define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL #define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL +struct dm_cache_metadata; + /* * Reopens or creates a new, empty metadata volume. Returns an ERR_PTR on * failure. If reopening then features must match. diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c deleted file mode 100644 index 2e8a8f1d8358..000000000000 --- a/drivers/md/dm-cache-policy-cleaner.c +++ /dev/null @@ -1,469 +0,0 @@ -/* - * Copyright (C) 2012 Red Hat. All rights reserved. - * - * writeback cache policy supporting flushing out dirty cache blocks. - * - * This file is released under the GPL. - */ - -#include "dm-cache-policy.h" -#include "dm.h" - -#include -#include -#include -#include - -/*----------------------------------------------------------------*/ - -#define DM_MSG_PREFIX "cache cleaner" - -/* Cache entry struct. */ -struct wb_cache_entry { - struct list_head list; - struct hlist_node hlist; - - dm_oblock_t oblock; - dm_cblock_t cblock; - bool dirty:1; - bool pending:1; -}; - -struct hash { - struct hlist_head *table; - dm_block_t hash_bits; - unsigned nr_buckets; -}; - -struct policy { - struct dm_cache_policy policy; - spinlock_t lock; - - struct list_head free; - struct list_head clean; - struct list_head clean_pending; - struct list_head dirty; - - /* - * We know exactly how many cblocks will be needed, - * so we can allocate them up front. - */ - dm_cblock_t cache_size, nr_cblocks_allocated; - struct wb_cache_entry *cblocks; - struct hash chash; -}; - -/*----------------------------------------------------------------------------*/ - -/* - * Low-level functions. - */ -static unsigned next_power(unsigned n, unsigned min) -{ - return roundup_pow_of_two(max(n, min)); -} - -static struct policy *to_policy(struct dm_cache_policy *p) -{ - return container_of(p, struct policy, policy); -} - -static struct list_head *list_pop(struct list_head *q) -{ - struct list_head *r = q->next; - - list_del(r); - - return r; -} - -/*----------------------------------------------------------------------------*/ - -/* Allocate/free various resources. */ -static int alloc_hash(struct hash *hash, unsigned elts) -{ - hash->nr_buckets = next_power(elts >> 4, 16); - hash->hash_bits = __ffs(hash->nr_buckets); - hash->table = vzalloc(sizeof(*hash->table) * hash->nr_buckets); - - return hash->table ? 0 : -ENOMEM; -} - -static void free_hash(struct hash *hash) -{ - vfree(hash->table); -} - -static int alloc_cache_blocks_with_hash(struct policy *p, dm_cblock_t cache_size) -{ - int r = -ENOMEM; - - p->cblocks = vzalloc(sizeof(*p->cblocks) * from_cblock(cache_size)); - if (p->cblocks) { - unsigned u = from_cblock(cache_size); - - while (u--) - list_add(&p->cblocks[u].list, &p->free); - - p->nr_cblocks_allocated = 0; - - /* Cache entries hash. */ - r = alloc_hash(&p->chash, from_cblock(cache_size)); - if (r) - vfree(p->cblocks); - } - - return r; -} - -static void free_cache_blocks_and_hash(struct policy *p) -{ - free_hash(&p->chash); - vfree(p->cblocks); -} - -static struct wb_cache_entry *alloc_cache_entry(struct policy *p) -{ - struct wb_cache_entry *e; - - BUG_ON(from_cblock(p->nr_cblocks_allocated) >= from_cblock(p->cache_size)); - - e = list_entry(list_pop(&p->free), struct wb_cache_entry, list); - p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) + 1); - - return e; -} - -/*----------------------------------------------------------------------------*/ - -/* Hash functions (lookup, insert, remove). */ -static struct wb_cache_entry *lookup_cache_entry(struct policy *p, dm_oblock_t oblock) -{ - struct hash *hash = &p->chash; - unsigned h = hash_64(from_oblock(oblock), hash->hash_bits); - struct wb_cache_entry *cur; - struct hlist_head *bucket = &hash->table[h]; - - hlist_for_each_entry(cur, bucket, hlist) { - if (cur->oblock == oblock) { - /* Move upfront bucket for faster access. */ - hlist_del(&cur->hlist); - hlist_add_head(&cur->hlist, bucket); - return cur; - } - } - - return NULL; -} - -static void insert_cache_hash_entry(struct policy *p, struct wb_cache_entry *e) -{ - unsigned h = hash_64(from_oblock(e->oblock), p->chash.hash_bits); - - hlist_add_head(&e->hlist, &p->chash.table[h]); -} - -static void remove_cache_hash_entry(struct wb_cache_entry *e) -{ - hlist_del(&e->hlist); -} - -/* Public interface (see dm-cache-policy.h */ -static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock, - bool can_block, bool can_migrate, bool discarded_oblock, - struct bio *bio, struct policy_locker *locker, - struct policy_result *result) -{ - struct policy *p = to_policy(pe); - struct wb_cache_entry *e; - unsigned long flags; - - result->op = POLICY_MISS; - - if (can_block) - spin_lock_irqsave(&p->lock, flags); - - else if (!spin_trylock_irqsave(&p->lock, flags)) - return -EWOULDBLOCK; - - e = lookup_cache_entry(p, oblock); - if (e) { - result->op = POLICY_HIT; - result->cblock = e->cblock; - - } - - spin_unlock_irqrestore(&p->lock, flags); - - return 0; -} - -static int wb_lookup(struct dm_cache_policy *pe, dm_oblock_t oblock, dm_cblock_t *cblock) -{ - int r; - struct policy *p = to_policy(pe); - struct wb_cache_entry *e; - unsigned long flags; - - if (!spin_trylock_irqsave(&p->lock, flags)) - return -EWOULDBLOCK; - - e = lookup_cache_entry(p, oblock); - if (e) { - *cblock = e->cblock; - r = 0; - - } else - r = -ENOENT; - - spin_unlock_irqrestore(&p->lock, flags); - - return r; -} - -static void __set_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock, bool set) -{ - struct policy *p = to_policy(pe); - struct wb_cache_entry *e; - - e = lookup_cache_entry(p, oblock); - BUG_ON(!e); - - if (set) { - if (!e->dirty) { - e->dirty = true; - list_move(&e->list, &p->dirty); - } - - } else { - if (e->dirty) { - e->pending = false; - e->dirty = false; - list_move(&e->list, &p->clean); - } - } -} - -static void wb_set_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock) -{ - struct policy *p = to_policy(pe); - unsigned long flags; - - spin_lock_irqsave(&p->lock, flags); - __set_clear_dirty(pe, oblock, true); - spin_unlock_irqrestore(&p->lock, flags); -} - -static void wb_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock) -{ - struct policy *p = to_policy(pe); - unsigned long flags; - - spin_lock_irqsave(&p->lock, flags); - __set_clear_dirty(pe, oblock, false); - spin_unlock_irqrestore(&p->lock, flags); -} - -static void add_cache_entry(struct policy *p, struct wb_cache_entry *e) -{ - insert_cache_hash_entry(p, e); - if (e->dirty) - lis