summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-snap.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-snap.c')
-rw-r--r--drivers/md/dm-snap.c1279
1 files changed, 1017 insertions, 262 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 3a3ba46e6d4b..ee8eb283650d 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -25,6 +25,11 @@
#define DM_MSG_PREFIX "snapshots"
+static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
+
+#define dm_target_is_snapshot_merge(ti) \
+ ((ti)->type->name == dm_snapshot_merge_target_name)
+
/*
* The percentage increment we will wake up users at
*/
@@ -49,7 +54,7 @@
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
-struct exception_table {
+struct dm_exception_table {
uint32_t hash_mask;
unsigned hash_shift;
struct list_head *table;
@@ -59,22 +64,31 @@ struct dm_snapshot {
struct rw_semaphore lock;
struct dm_dev *origin;
+ struct dm_dev *cow;
+
+ struct dm_target *ti;
/* List of snapshots per Origin */
struct list_head list;
- /* You can't use a snapshot if this is 0 (e.g. if full) */
+ /*
+ * You can't use a snapshot if this is 0 (e.g. if full).
+ * A snapshot-merge target never clears this.
+ */
int valid;
/* Origin writes don't trigger exceptions until this is set */
int active;
+ /* Whether or not owning mapped_device is suspended */
+ int suspended;
+
mempool_t *pending_pool;
atomic_t pending_exceptions_count;
- struct exception_table pending;
- struct exception_table complete;
+ struct dm_exception_table pending;
+ struct dm_exception_table complete;
/*
* pe_lock protects all pending_exception operations and access
@@ -95,8 +109,51 @@ struct dm_snapshot {
mempool_t *tracked_chunk_pool;
spinlock_t tracked_chunk_lock;
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+
+ /*
+ * The merge operation failed if this flag is set.
+ * Failure modes are handled as follows:
+ * - I/O error reading the header
+ * => don't load the target; abort.
+ * - Header does not have "valid" flag set
+ * => use the origin; forget about the snapshot.
+ * - I/O error when reading exceptions
+ * => don't load the target; abort.
+ * (We can't use the intermediate origin state.)
+ * - I/O error while merging
+ * => stop merging; set merge_failed; process I/O normally.
+ */
+ int merge_failed;
+
+ /* Wait for events based on state_bits */
+ unsigned long state_bits;
+
+ /* Range of chunks currently being merged. */
+ chunk_t first_merging_chunk;
+ int num_merging_chunks;
+
+ /*
+ * Incoming bios that overlap with chunks being merged must wait
+ * for them to be committed.
+ */
+ struct bio_list bios_queued_during_merge;
};
+/*
+ * state_bits:
+ * RUNNING_MERGE - Merge operation is in progress.
+ * SHUTDOWN_MERGE - Set to signal that merge needs to be stopped;
+ * cleared afterwards.
+ */
+#define RUNNING_MERGE 0
+#define SHUTDOWN_MERGE 1
+
+struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
+{
+ return s->cow;
+}
+EXPORT_SYMBOL(dm_snap_cow);
+
static struct workqueue_struct *ksnapd;
static void flush_queued_bios(struct work_struct *work);
@@ -116,7 +173,7 @@ static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
}
struct dm_snap_pending_exception {
- struct dm_snap_exception e;
+ struct dm_exception e;
/*
* Origin buffers waiting for this to complete are held
@@ -125,28 +182,6 @@ struct dm_snap_pending_exception {
struct bio_list origin_bios;
struct bio_list snapshot_bios;
- /*
- * Short-term queue of pending exceptions prior to submission.
- */
- struct list_head list;
-
- /*
- * The primary pending_exception is the one that holds
- * the ref_count and the list of origin_bios for a
- * group of pending_exceptions. It is always last to get freed.
- * These fields get set up when writing to the origin.
- */
- struct dm_snap_pending_exception *primary_pe;
-
- /*
- * Number of pending_exceptions processing this chunk.
- * When this drops to zero we must complete the origin bios.
- * If incrementing or decrementing this, hold pe->snap->lock for
- * the sibling concerned and not pe->primary_pe->snap->lock unless
- * they are the same.
- */
- atomic_t ref_count;
-
/* Pointer back to snapshot context */
struct dm_snapshot *snap;
@@ -222,6 +257,16 @@ static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
}
/*
+ * This conflicting I/O is extremely improbable in the caller,
+ * so msleep(1) is sufficient and there is no need for a wait queue.
+ */
+static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk)
+{
+ while (__chunk_is_tracked(s, chunk))
+ msleep(1);
+}
+
+/*
* One of these per registered origin, held in the snapshot_origins hash
*/
struct origin {
@@ -243,6 +288,10 @@ struct origin {
static struct list_head *_origins;
static struct rw_semaphore _origins_lock;
+static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
+static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock);
+static uint64_t _pending_exceptions_done_count;
+
static int init_origin_hash(void)
{
int i;
@@ -291,22 +340,144 @@ static void __insert_origin(struct origin *o)
}
/*
+ * _origins_lock must be held when calling this function.
+ * Returns number of snapshots registered using the supplied cow device, plus:
+ * snap_src - a snapshot suitable for use as a source of exception handover
+ * snap_dest - a snapshot capable of receiving exception handover.
+ * snap_merge - an existing snapshot-merge target linked to the same origin.
+ * There can be at most one snapshot-merge target. The parameter is optional.
+ *
+ * Possible return values and states of snap_src and snap_dest.
+ * 0: NULL, NULL - first new snapshot
+ * 1: snap_src, NULL - normal snapshot
+ * 2: snap_src, snap_dest - waiting for handover
+ * 2: snap_src, NULL - handed over, waiting for old to be deleted
+ * 1: NULL, snap_dest - source got destroyed without handover
+ */
+static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
+ struct dm_snapshot **snap_src,
+ struct dm_snapshot **snap_dest,
+ struct dm_snapshot **snap_merge)
+{
+ struct dm_snapshot *s;
+ struct origin *o;
+ int count = 0;
+ int active;
+
+ o = __lookup_origin(snap->origin->bdev);
+ if (!o)
+ goto out;
+
+ list_for_each_entry(s, &o->snapshots, list) {
+ if (dm_target_is_snapshot_merge(s->ti) && snap_merge)
+ *snap_merge = s;
+ if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
+ continue;
+
+ down_read(&s->lock);
+ active = s->active;
+ up_read(&s->lock);
+
+ if (active) {
+ if (snap_src)
+ *snap_src = s;
+ } else if (snap_dest)
+ *snap_dest = s;
+
+ count++;
+ }
+
+out:
+ return count;
+}
+
+/*
+ * On success, returns 1 if this snapshot is a handover destination,
+ * otherwise returns 0.
+ */
+static int __validate_exception_handover(struct dm_snapshot *snap)
+{
+ struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+ struct dm_snapshot *snap_merge = NULL;
+
+ /* Does snapshot need exceptions handed over to it? */
+ if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest,
+ &snap_merge) == 2) ||
+ snap_dest) {
+ snap->ti->error = "Snapshot cow pairing for exception "
+ "table handover failed";
+ return -EINVAL;
+ }
+
+ /*
+ * If no snap_src was found, snap cannot become a handover
+ * destination.
+ */
+ if (!snap_src)
+ return 0;
+
+ /*
+ * Non-snapshot-merge handover?
+ */
+ if (!dm_target_is_snapshot_merge(snap->ti))
+ return 1;
+
+ /*
+ * Do not allow more than one merging snapshot.
+ */
+ if (snap_merge) {
+ snap->ti->error = "A snapshot is already merging.";
+ return -EINVAL;
+ }
+
+ if (!snap_src->store->type->prepare_merge ||
+ !snap_src->store->type->commit_merge) {
+ snap->ti->error = "Snapshot exception store does not "
+ "support snapshot-merge.";
+ return -EINVAL;
+ }
+
+ return 1;
+}
+
+static void __insert_snapshot(struct origin *o, struct dm_snapshot *s)
+{
+ struct dm_snapshot *l;
+
+ /* Sort the list according to chunk size, largest-first smallest-last */
+ list_for_each_entry(l, &o->snapshots, list)
+ if (l->store->chunk_size < s->store->chunk_size)
+ break;
+ list_add_tail(&s->list, &l->list);
+}
+
+/*
* Make a note of the snapshot and its origin so we can look it
* up when the origin has a write on it.
+ *
+ * Also validate snapshot exception store handovers.
+ * On success, returns 1 if this registration is a handover destination,
+ * otherwise returns 0.
*/
static int register_snapshot(struct dm_snapshot *snap)
{
- struct dm_snapshot *l;
- struct origin *o, *new_o;
+ struct origin *o, *new_o = NULL;
struct block_device *bdev = snap->origin->bdev;
+ int r = 0;
new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
if (!new_o)
return -ENOMEM;
down_write(&_origins_lock);
- o = __lookup_origin(bdev);
+ r = __validate_exception_handover(snap);
+ if (r < 0) {
+ kfree(new_o);
+ goto out;
+ }
+
+ o = __lookup_origin(bdev);
if (o)
kfree(new_o);
else {
@@ -320,14 +491,27 @@ static int register_snapshot(struct dm_snapshot *snap)
__insert_origin(o);
}
- /* Sort the list according to chunk size, largest-first smallest-last */
- list_for_each_entry(l, &o->snapshots, list)
- if (l->store->chunk_size < snap->store->chunk_size)
- break;
- list_add_tail(&snap->list, &l->list);
+ __insert_snapshot(o, snap);
+
+out:
+ up_write(&_origins_lock);
+
+ return r;
+}
+
+/*
+ * Move snapshot to correct place in list according to chunk size.
+ */
+static void reregister_snapshot(struct dm_snapshot *s)
+{
+ struct block_device *bdev = s->origin->bdev;
+
+ down_write(&_origins_lock);
+
+ list_del(&s->list);
+ __insert_snapshot(__lookup_origin(bdev), s);
up_write(&_origins_lock);
- return 0;
}
static void unregister_snapshot(struct dm_snapshot *s)
@@ -338,7 +522,7 @@ static void unregister_snapshot(struct dm_snapshot *s)
o = __lookup_origin(s->origin->bdev);
list_del(&s->list);
- if (list_empty(&o->snapshots)) {
+ if (o && list_empty(&o->snapshots)) {
list_del(&o->hash_list);
kfree(o);
}
@@ -351,8 +535,8 @@ static void unregister_snapshot(struct dm_snapshot *s)
* The lowest hash_shift bits of the chunk number are ignored, allowing
* some consecutive chunks to be grouped together.
*/
-static int init_exception_table(struct exception_table *et, uint32_t size,
- unsigned hash_shift)
+static int dm_exception_table_init(struct dm_exception_table *et,
+ uint32_t size, unsigned hash_shift)
{
unsigned int i;
@@ -368,10 +552,11 @@ static int init_exception_table(struct exception_table *et, uint32_t size,
return 0;
}
-static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem)
+static void dm_exception_table_exit(struct dm_exception_table *et,
+ struct kmem_cache *mem)
{
struct list_head *slot;
- struct dm_snap_exception *ex, *next;
+ struct dm_exception *ex, *next;
int i, size;
size = et->hash_mask + 1;
@@ -385,19 +570,12 @@ static void exit_exception_table(struct exception_table *et, struct kmem_cache *
vfree(et->table);
}
-static uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
+static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
{
return (chunk >> et->hash_shift) & et->hash_mask;
}
-static void insert_exception(struct exception_table *eh,
- struct dm_snap_exception *e)
-{
- struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
- list_add(&e->hash_list, l);
-}
-
-static void remove_exception(struct dm_snap_exception *e)
+static void dm_remove_exception(struct dm_exception *e)
{
list_del(&e->hash_list);
}
@@ -406,11 +584,11 @@ static void remove_exception(struct dm_snap_exception *e)
* Return the exception data for a sector, or NULL if not
* remapped.
*/
-static struct dm_snap_exception *lookup_exception(struct exception_table *et,
- chunk_t chunk)
+static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
+ chunk_t chunk)
{
struct list_head *slot;
- struct dm_snap_exception *e;
+ struct dm_exception *e;
slot = &et->table[exception_hash(et, chunk)];
list_for_each_entry (e, slot, hash_list)
@@ -421,9 +599,9 @@ static struct dm_snap_exception *lookup_exception(struct exception_table *et,
return NULL;
}
-static struct dm_snap_exception *alloc_exception(void)
+static struct dm_exception *alloc_completed_exception(void)
{
- struct dm_snap_exception *e;
+ struct dm_exception *e;
e = kmem_cache_alloc(exception_cache, GFP_NOIO);
if (!e)
@@ -432,7 +610,7 @@ static struct dm_snap_exception *alloc_exception(void)
return e;
}
-static void free_exception(struct dm_snap_exception *e)
+static void free_completed_exception(struct dm_exception *e)
{
kmem_cache_free(exception_cache, e);
}
@@ -457,12 +635,11 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe)
atomic_dec(&s->pending_exceptions_count);
}
-static void insert_completed_exception(struct dm_snapshot *s,
- struct dm_snap_exception *new_e)
+static void dm_insert_exception(struct dm_exception_table *eh,
+ struct dm_exception *new_e)
{
- struct exception_table *eh = &s->complete;
struct list_head *l;
- struct dm_snap_exception *e = NULL;
+ struct dm_exception *e = NULL;
l = &eh->table[exception_hash(eh, new_e->old_chunk)];
@@ -478,7 +655,7 @@ static void insert_completed_exception(struct dm_snapshot *s,
new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
dm_consecutive_chunk_count(e) + 1)) {
dm_consecutive_chunk_count_inc(e);
- free_exception(new_e);
+ free_completed_exception(new_e);
return;
}
@@ -488,7 +665,7 @@ static void insert_completed_exception(struct dm_snapshot *s,
dm_consecutive_chunk_count_inc(e);
e->old_chunk--;
e->new_chunk--;
- free_exception(new_e);
+ free_completed_exception(new_e);
return;
}
@@ -507,9 +684,9 @@ out:
static int dm_add_exception(void *context, chunk_t old, chunk_t new)
{
struct dm_snapshot *s = context;
- struct dm_snap_exception *e;
+ struct dm_exception *e;
- e = alloc_exception();
+ e = alloc_completed_exception();
if (!e)
return -ENOMEM;
@@ -518,11 +695,30 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
/* Consecutive_count is implicitly initialised to zero */
e->new_chunk = new;
- insert_completed_exception(s, e);
+ dm_insert_exception(&s->complete, e);
return 0;
}
+#define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r)))
+
+/*
+ * Return a minimum chunk size of all snapshots that have the specified origin.
+ * Return zero if the origin has no snapshots.
+ */
+static sector_t __minimum_chunk_size(struct origin *o)
+{
+ struct dm_snapshot *snap;
+ unsigned chunk_size = 0;
+
+ if (o)
+ list_for_each_entry(snap, &o->snapshots, list)
+ chunk_size = min_not_zero(chunk_size,
+ snap->store->chunk_size);
+
+ return chunk_size;
+}
+
/*
* Hard coded magic.
*/
@@ -546,16 +742,18 @@ static int init_hash_tables(struct dm_snapshot *s)
* Calculate based on the size of the original volume or
* the COW volume...
*/
- cow_dev_size = get_dev_size(s->store->cow->bdev);
+ cow_dev_size = get_dev_size(s->cow->bdev);
origin_dev_size = get_dev_size(s->origin->bdev);
max_buckets = calc_max_buckets();
hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
hash_size = min(hash_size, max_buckets);
+ if (hash_size < 64)
+ hash_size = 64;
hash_size = rounddown_pow_of_two(hash_size);
- if (init_exception_table(&s->complete, hash_size,
- DM_CHUNK_CONSECUTIVE_BITS))
+ if (dm_exception_table_init(&s->complete, hash_size,
+ DM_CHUNK_CONSECUTIVE_BITS))
return -ENOMEM;
/*
@@ -566,14 +764,284 @@ static int init_hash_tables(struct dm_snapshot *s)
if (hash_size < 64)
hash_size = 64;
- if (init_exception_table(&s->pending, hash_size, 0)) {
- exit_exception_table(&s->complete, exception_cache);
+ if (dm_exception_table_init(&s->pending, hash_size, 0)) {
+ dm_exception_table_exit(&s->complete, exception_cache);
return -ENOMEM;
}
return 0;
}
+static void merge_shutdown(struct dm_snapshot *s)
+{
+ clear_bit_unlock(RUNNING_MERGE, &s->state_bits);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&s->state_bits, RUNNING_MERGE);
+}
+
+static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s)
+{
+ s->first_merging_chunk = 0;
+ s->num_merging_chunks = 0;
+
+ return bio_list_get(&s->bios_queued_during_merge);
+}
+
+/*
+ * Remove one chunk from the index of completed exceptions.
+ */
+static int __remove_single_exception_chunk(struct dm_snapshot *s,
+ chunk_t old_chunk)
+{
+ struct dm_exception *e;
+
+ e = dm_lookup_exception(&s->complete, old_chunk);
+ if (!e) {
+ DMERR("Corruption detected: exception for block %llu is "
+ "on disk but not in memory",
+ (unsigned long long)old_chunk);
+ return -EINVAL;
+ }
+
+ /*
+ * If this is the only chunk using this exception, remove exception.
+ */
+ if (!dm_consecutive_chunk_count(e)) {
+ dm_remove_exception(e);
+ free_completed_exception(e);
+ return 0;
+ }
+
+ /*
+ * The chunk may be either at the beginning or the end of a
+ * group of consecutive chunks - never in the middle. We are
+ * removing chunks in the opposite order to that in which they
+ * were added, so this should always be true.
+ * Decrement the consecutive chunk counter and adjust the
+ * starting point if necessary.
+ */
+ if (old_chunk == e->old_chunk) {
+ e->old_chunk++;
+ e->new_chunk++;
+ } else if (old_chunk != e->old_chunk +
+ dm_consecutive_chunk_count(e)) {
+ DMERR("Attempt to merge block %llu from the "
+ "middle of a chunk range [%llu - %llu]",
+ (unsigned long long)old_chunk,
+ (unsigned long long)e->old_chunk,
+ (unsigned long long)
+ e->old_chunk + dm_consecutive_chunk_count(e));
+ return -EINVAL;
+ }
+
+ dm_consecutive_chunk_count_dec(e);
+
+ return 0;
+}
+
+static void flush_bios(struct bio *bio);
+
+static int remove_single_exception_chunk(struct dm_snapshot *s)
+{
+ struct bio *b = NULL;
+ int r;
+ chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1;
+
+ down_write(&s->lock);
+
+ /*
+ * Process chunks (and associated exceptions) in reverse order
+ * so that dm_consecutive_chunk_count_dec() accounting works.
+ */
+ do {
+ r = __remove_single_exception_chunk(s, old_chunk);
+ if (r)
+ goto out;
+ } while (old_chunk-- > s->first_merging_chunk);
+
+ b = __release_queued_bios_after_merge(s);
+
+out:
+ up_write(&s->lock);
+ if (b)
+ flush_bios(b);
+
+ return r;
+}
+
+static int origin_write_extent(struct dm_snapshot *merging_snap,
+ sector_t sector, unsigned chunk_size);
+
+static void merge_callback(int read_err, unsigned long write_err,
+ void *context);
+
+static uint64_t read_pending_exceptions_done_count(void)
+{
+ uint64_t pending_exceptions_done;
+
+ spin_lock(&_pending_exceptions_done_spinlock);
+ pending_exceptions_done = _pending_exceptions_done_count;
+ spin_unlock(&_pending_exceptions_done_spinlock);
+
+ return pending_exceptions_done;
+}
+
+static void increment_pending_exceptions_done_count(void)
+{
+ spin_lock(&_pending_exceptions_done_spinlock);
+ _pending_exceptions_done_count++;
+ spin_unlock(&_pending_exceptions_done_spinlock);
+
+ wake_up_all(&_pending_exceptions_done);
+}
+
+static void snapshot_merge_next_chunks(struct dm_snapshot *s)
+{
+ int i, linear_chunks;
+ chunk_t old_chunk, new_chunk;
+ struct dm_io_region src, dest;
+ sector_t io_size;
+ uint64_t previous_count;
+
+ BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits));
+ if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits)))
+ goto shut;
+
+ /*
+ * valid flag never changes during merge, so no lock required.
+ */
+ if (!s->valid) {
+ DMERR("Snapshot is invalid: can't merge");
+ goto shut;
+ }
+
+ linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk,
+ &new_chunk);
+ if (linear_chunks <= 0) {
+ if (linear_chunks < 0) {
+ DMERR("Read error in exception store: "
+ "shutting down merge");
+ down_write(&s->lock);
+ s->merge_failed = 1;
+ up_write(&s->lock);
+ }
+ goto shut;
+ }
+
+ /* Adjust old_chunk and new_chunk to reflect start of linear region */
+ old_chunk = old_chunk + 1 - linear_chunks;
+ new_chunk = new_chunk + 1 - linear_chunks;
+
+ /*
+ * Use one (potentially large) I/O to copy all 'linear_chunks'
+ * from the exception store to the origin
+ */
+ io_size = linear_chunks * s->store->chunk_size;
+
+ dest.bdev = s->origin->bdev;
+ dest.sector = chunk_to_sector(s->store, old_chunk);
+ dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
+
+ src.bdev = s->cow->bdev;
+ src.sector = chunk_to_sector(s->store, new_chunk);
+ src.count = dest.count;
+
+ /*
+ * Reallocate any exceptions needed in other snapshots then
+ * wait for the pending exceptions to complete.
+ * Each time any pending exception (globally on the system)
+ * completes we are woken and repeat the process to find out
+ * if we can proceed. While this may not seem a particularly
+ * efficient algorithm, it is not expected to have any
+ * significant impact on performance.
+ */
+ previous_count = read_pending_exceptions_done_count();
+ while (origin_write_extent(s, dest.sector, io_size)) {
+ wait_event(_pending_exceptions_done,
+ (read_pending_exceptions_done_count() !=
+ previous_count));
+ /* Retry after the wait, until all exceptions are done. */
+ previous_count = read_pending_exceptions_done_count();
+ }
+
+ down_write(&s->lock);
+ s->first_merging_chunk = old_chunk;
+ s->num_merging_chunks = linear_chunks;
+ up_write(&s->lock);
+
+ /* Wait until writes to all 'linear_chunks' drain */
+ for (i = 0; i < linear_chunks; i++)
+ __check_for_conflicting_io(s, old_chunk + i);
+
+ dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
+ return;
+
+shut:
+ merge_shutdown(s);
+}
+
+static void error_bios(struct bio *bio);
+
+static void merge_callback(int read_err, unsigned long write_err, void *context)
+{
+ struct dm_snapshot *s = context;
+ struct bio *b = NULL;
+
+ if (read_err || write_err) {
+ if (read_err)
+ DMERR("Read error: shutting down merge.");
+ else
+ DMERR("Write error: shutting down merge.");
+ goto shut;
+ }
+
+ if (s->store->type->commit_merge(s->store,
+ s->num_merging_chunks) < 0) {
+ DMERR("Write error in exception store: shutting down merge");
+ goto shut;
+ }
+
+ if (remove_single_exception_chunk(s) < 0)
+ goto shut;
+
+ snapshot_merge_next_chunks(s);
+
+ return;
+
+shut:
+ down_write(&s->lock);
+ s->merge_failed = 1;
+ b = __release_queued_bios_after_merge(s);
+ up_write(&s->lock);
+ error_bios(b);
+
+ merge_shutdown(s);
+}
+
+static void start_merge(struct dm_snapshot *s)
+{
+ if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits))
+ snapshot_merge_next_chunks(s);
+}
+
+static int wait_schedule(void *ptr)
+{
+ schedule();
+
+ return 0;
+}
+
+/*
+ * Stop the merging process and wait until it finishes.
+ */
+static void stop_merge(struct dm_snapshot *s)
+{
+ set_bit(SHUTDOWN_MERGE, &s->state_bits);
+ wait_on_bit(&s->state_bits, RUNNING_MERGE, wait_schedule,
+ TASK_UNINTERRUPTIBLE);
+ clear_bit(SHUTDOWN_MERGE, &s->state_bits);
+}
+
/*
* Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
*/
@@ -582,50 +1050,73 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
struct dm_snapshot *s;
int i;
int r = -EINVAL;
- char *origin_path;
- struct dm_exception_store *store;
- unsigned args_used;
+ char *origin_path, *cow_path;
+ unsigned args_used, num_flush_requests = 1;
+ fmode_t origin_mode = FMODE_READ;
if (argc != 4) {
ti->error = "requires exactly 4 arguments";
r = -EINVAL;
- goto bad_args;
+ goto bad;
+ }
+
+ if (dm_target_is_snapshot_merge(ti)) {
+ num_flush_requests = 2;
+ origin_mode = FMODE_WRITE;
}
origin_path = argv[0];
argv++;
argc--;
- r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s) {
+ ti->error = "Cannot allocate snapshot context private "
+ "structure";
+ r = -ENOMEM;
+ goto bad;
+ }
+
+ cow_path = argv[0];
+ argv++;
+ argc--;
+
+ r = dm_get_device(ti, cow_path, 0, 0,
+ FMODE_READ | FMODE_WRITE, &s->cow);
+ if (r) {
+ ti->error = "Cannot get COW device";
+ goto bad_cow;
+ }
+
+ r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
if (r) {
ti->error = "Couldn't create exception store";
r = -EINVAL;
- goto bad_args;
+ goto bad_store;
}
argv += args_used;
argc -= args_used;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s) {
- ti->error = "Cannot allocate snapshot context private "
- "structure";
- r = -ENOMEM;
- goto bad_snap;
- }
-
- r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
+ r = dm_get_device(ti, origin_path, 0, ti->len, origin_mode, &s->origin);
if (r) {
ti->error = "Cannot get origin device";
goto bad_origin;
}
- s->store = store;
+ s->ti = ti;
s->valid = 1;
s->active = 0;
+ s->suspended = 0;
atomic_set(&s->pending_exceptions_count, 0);
init_rwsem(&s->lock);
+ INIT_LIST_HEAD(&s->list);
spin_lock_init(&s->pe_lock);
+ s->state_bits = 0;
+ s->merge_failed = 0;
+ s->first_merging_chunk = 0;
+ s->num_merging_chunks = 0;
+ bio_list_init(&s->bios_queued_during_merge);
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
@@ -659,39 +1150,55 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
spin_lock_init(&s->tracked_chunk_lock);
- /* Metadata must only be loaded into one table at once */
+ bio_list_init(&s->queued_bios);
+ INIT_WORK(&s->queued_bios_work, flush_queued_bios);
+
+ ti->private = s;
+ ti->num_flush_requests = num_flush_requests;
+
+ /* Add snapshot to the list of snapshots for this origin */
+ /* Exceptions aren't triggered till snapshot_resume() is called */
+ r = register_snapshot(s);
+ if (r == -ENOMEM) {
+ ti->error = "Snapshot origin struct allocation failed";
+ goto bad_load_and_register;
+ } else if (r < 0) {
+ /* invalid handover, register_snapshot has set ti->error */
+ goto bad_load_and_register;
+ }
+
+ /*
+ * Metadata must only be loaded into one table at once, so skip this
+ * if metadata will be handed over during resume.
+ * Chunk size will be set during the handover - set it to zero to
+ * ensure it's ignored.
+ */
+ if (r > 0) {
+ s->store->chunk_size = 0;
+ return 0;
+ }
+
r = s->store->type->read_metadata(s->store, dm_add_exception,
(void *)s);
if (r < 0) {
ti->error = "Failed to read snapshot metadata";
- goto bad_load_and_register;
+ goto bad_read_metadata;
} else if (r > 0) {
s->valid = 0;
DMWARN("Snapshot is marked invalid.");
}
- bio_list_init(&s->queued_bios);
- INIT_WORK(&s->queued_bios_work, flush_queued_bios);
-
if (!s->store->chunk_size) {
ti->error = "Chunk size not set";
- goto bad_load_and_register;
- }
-
- /* Add snapshot to the list of snapshots for this origin */
- /* Exceptions aren't triggered till snapshot_resume() is called */
- if (register_snapshot(s)) {
- r = -EINVAL;
- ti->error = "Cannot register snapshot origin";
- goto bad_load_and_register;
+ goto bad_read_metadata;
}
-
- ti->private = s;
ti->split_io = s->store->chunk_size;
- ti->num_flush_requests = 1;
return 0;
+bad_read_metadata:
+ unregister_snapshot(s);
+
bad_load_and_register:
mempool_destroy(s->tracked_chunk_pool);
@@ -702,19 +1209,22 @@ bad_pending_pool:
dm_kcopyd_client_destroy(s->kcopyd_client);
bad_kcopyd:
- exit_exception_table(&s->pending, pending_cache);
- exit_exception_table(&s->complete, exception_cache);
+ dm_exception_table_exit(&s->pending, pending_cache);
+ dm_exception_table_exit(&s->complete, exception_cache);
bad_hash_tables:
dm_put_device(ti, s->origin);
bad_origin:
- kfree(s);
+ dm_exception_store_destroy(s->store);
-bad_snap:
- dm_exception_store_destroy(store);
+bad_store:
+ dm_put_device(ti, s->cow);
+
+bad_cow:
+ kfree(s);
-bad_args:
+bad:
return r;
}
@@ -723,8 +1233,39 @@ static void __free_exceptions(struct dm_snapshot *s)
dm_kcopyd_client_destroy(s->kcopyd_client);
s->kcopyd_client = NULL;
- exit_exception_table(&s->pending, pending_cache);
- exit_exception_table(&s->complete, exception_cache);
+ dm_exception_table_exit(&s->pending, pending_cache);
+ dm_exception_table_exit(&s->complete, exception_cache);
+}
+
+static void __handover_exceptions(struct dm_snapshot *snap_src,
+ struct dm_snapshot *snap_dest)
+{
+ union {
+ struct dm_exception_table table_swap;
+ struct dm_exception_store *store_swap;
+ } u;
+
+ /*
+ * Swap all snapshot context information between the two instances.
+ */
+ u.table_swap = snap_dest->complete;
+ snap_dest->complete = snap_src->complete;
+ snap_src->complete = u.table_swap;
+
+ u.store_swap = snap_dest->store;
+ snap_dest->store = snap_src->store;
+ snap_src->store = u.store_swap;
+
+ snap_dest->store->snap = snap_dest;
+ snap_src->store->snap = snap_src;
+
+ snap_dest->ti->split_io = snap_dest->store->chunk_size;
+ snap_dest->valid = snap_src->valid;
+
+ /*
+ * Set source invalid to ensure it receives no further I/O.
+ */
+ snap_src->valid = 0;
}
static void snapshot_dtr(struct dm_target *ti)
@@ -733,9 +1274,24 @@ static void snapshot_dtr(struct dm_target *ti)
int i;
#endif
struct dm_snapshot *s = ti->private;
+ struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
flush_workqueue(ksnapd);
+ down_read(&_origins_lock);
+ /* Check whether exception handover must be cancelled */
+ (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
+ if (snap_src && snap_dest && (s == snap_src)) {
+ down_write(&snap_dest->lock);
+ snap_dest->valid = 0;
+ up_write(&snap_dest->lock);
+ DMERR("Cancelling snapshot handover.");
+ }
+ up_read(&_origins_lock);
+
+ if (dm_target_is_snapshot_merge(ti))
+ stop_merge(s);
+
/* Prevent further origin writes from using this snapshot. */
/* After this returns there can be no new kcopyd jobs. */
unregister_snapshot(s);
@@ -763,6 +1319,8 @@ static void snapshot_dtr(struct dm_target *ti)
dm_exception_store_destroy(s->store);
+ dm_put_device(ti, s->cow);
+
kfree(s);
}
@@ -795,6 +1353,26 @@ static void flush_queued_bios(struct work_struct *work)
flush_bios(queued_bios);
}
+static int do_origin(struct dm_dev *origin, struct bio *bio);
+
+/*
+ * Flush a list of buffers.
+ */
+static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
+{
+ struct bio *n;
+ int r;
+
+ while (bio) {
+ n = bio->bi_next;
+ bio->bi_next = NULL;
+ r = do_origin(s->origin, bio);
+ if (r == DM_MAPIO_REMAPPED)
+ generic_make_request(bio);
+ bio = n;
+ }
+}
+
/*
* Error a list of buffers.
*/
@@ -825,45 +1403,12 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
s->valid = 0;
- dm_table_event(s->store->ti->table);
-}
-
-static void get_pending_exception(struct dm_snap_pending_exception *pe)
-{
- atomic_inc(&pe->ref_count);
-}
-
-static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
-{
- struct dm_snap_pending_exception *primary_pe;
- struct bio *origin_bios = NULL;
-
- primary_pe = pe->primary_pe;
-
- /*
- * If this pe is involved in a write to the origin and
- * it is the last sibling to complete then release
- * the bios for the original write to the origin.
- */
- if (primary_pe &&
- atomic_dec_and_test(&primary_pe->ref_count)) {
- origin_bios = bio_list_get(&primary_pe->origin_bios);
- free_pending_exception(primary_pe);
- }
-
- /*
- * Free the pe if it's not linked to an origin write or if
- * it's not itself a primary pe.
- */
- if (!primary_pe || primary_pe != pe)
- free_pending_exception(pe);
-
- return origin_bios;
+ dm_table_event(s->ti->table);
}
static void pending_complete(struct dm_snap_pending_exception *pe, int success)
{
- struct dm_snap_exception *e;
+ struct dm_exception *e;
struct dm_snapshot *s = pe->snap;
struct bio *origin_bios = NULL;
struct bio *snapshot_bios = NULL;
@@ -877,7 +1422,7 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
goto out;
}
- e = alloc_exception();
+ e = alloc_completed_exception();
if (!e) {
down_write(&s->lock);
__invalidate_snapshot(s, -ENOMEM);
@@ -888,28 +1433,27 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
down_write(&s->lock);
if (!s->valid) {
- free_exception(e);
+ free_completed_exception(e);
error = 1