summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-05 18:49:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-05 18:49:31 -0700
commit04535d273ee3edacd9551b2512b4e939ba20277f (patch)
tree262f3df914bfea16b43226fa60c2f43345ee0146 /drivers/md
parent3f583bc21977a608908b83d03ee2250426a5695c (diff)
parent0596661f0a16d9d69bf1033320e70b6ff52b5e81 (diff)
Merge tag 'dm-3.15-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper changes from Mike Snitzer: - Fix dm-cache corruption caused by discard_block_size > cache_block_size - Fix a lock-inversion detected by LOCKDEP in dm-cache - Fix a dangling bio bug in the dm-thinp target's process_deferred_bios error path - Fix corruption due to non-atomic transaction commit which allowed a metadata superblock to be written before all other metadata was successfully written -- this is common to all targets that use the persistent-data library's transaction manager (dm-thinp, dm-cache and dm-era). - Various small cleanups in the DM core - Add the dm-era target which is useful for keeping track of which blocks were written within a user defined period of time called an 'era'. Use cases include tracking changed blocks for backup software, and partially invalidating the contents of a cache to restore cache coherency after rolling back a vendor snapshot. - Improve the on-disk layout of multithreaded writes to the dm-thin-pool by splitting the pool's deferred bio list to be a per-thin device list and then sorting that list using an rb_tree. The subsequent read throughput of the data written via multiple threads improved by ~70%. - Simplify the multipath target's handling of queuing IO by pushing requests back to the request queue rather than queueing the IO internally. * tag 'dm-3.15-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (24 commits) dm cache: fix a lock-inversion dm thin: sort the per thin deferred bios using an rb_tree dm thin: use per thin device deferred bio lists dm thin: simplify pool_is_congested dm thin: fix dangling bio in process_deferred_bios error path dm mpath: print more useful warnings in multipath_message() dm-mpath: do not activate failed paths dm mpath: remove extra nesting in map function dm mpath: remove map_io() dm mpath: reduce memory pressure when requeuing dm mpath: remove process_queued_ios() dm mpath: push back requests instead of queueing dm table: add dm_table_run_md_queue_async dm mpath: do not call pg_init when it is already running dm: use RCU_INIT_POINTER instead of rcu_assign_pointer in __unbind dm: stop using bi_private dm: remove dm_get_mapinfo dm: make dm_table_alloc_md_mempools static dm: take care to copy the space map roots before locking the superblock dm transaction manager: fix corruption due to non-atomic transaction commit ...
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig11
-rw-r--r--drivers/md/Makefile2
-rw-r--r--drivers/md/dm-cache-block-types.h11
-rw-r--r--drivers/md/dm-cache-metadata.c132
-rw-r--r--drivers/md/dm-cache-metadata.h15
-rw-r--r--drivers/md/dm-cache-target.c131
-rw-r--r--drivers/md/dm-era-target.c1746
-rw-r--r--drivers/md/dm-mpath.c219
-rw-r--r--drivers/md/dm-table.c21
-rw-r--r--drivers/md/dm-thin-metadata.c80
-rw-r--r--drivers/md/dm-thin.c263
-rw-r--r--drivers/md/dm.c24
-rw-r--r--drivers/md/dm.h2
-rw-r--r--drivers/md/persistent-data/dm-bitset.c10
-rw-r--r--drivers/md/persistent-data/dm-bitset.h1
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c15
-rw-r--r--drivers/md/persistent-data/dm-block-manager.h3
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c5
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.h17
19 files changed, 2233 insertions, 475 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 95ad936e6048..5bdedf6df153 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -285,6 +285,17 @@ config DM_CACHE_CLEANER
A simple cache policy that writes back all data to the
origin. Used when decommissioning a dm-cache.
+config DM_ERA
+ tristate "Era target (EXPERIMENTAL)"
+ depends on BLK_DEV_DM
+ default n
+ select DM_PERSISTENT_DATA
+ select DM_BIO_PRISON
+ ---help---
+ dm-era tracks which parts of a block device are written to
+ over time. Useful for maintaining cache coherency when using
+ vendor snapshots.
+
config DM_MIRROR
tristate "Mirror target"
depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index f26d83292579..a2da532b1c2b 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -14,6 +14,7 @@ dm-thin-pool-y += dm-thin.o dm-thin-metadata.o
dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o
dm-cache-mq-y += dm-cache-policy-mq.o
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
+dm-era-y += dm-era-target.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o
@@ -53,6 +54,7 @@ obj-$(CONFIG_DM_VERITY) += dm-verity.o
obj-$(CONFIG_DM_CACHE) += dm-cache.o
obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o
obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o
+obj-$(CONFIG_DM_ERA) += dm-era.o
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o
diff --git a/drivers/md/dm-cache-block-types.h b/drivers/md/dm-cache-block-types.h
index bed4ad4e1b7c..aac0e2df06be 100644
--- a/drivers/md/dm-cache-block-types.h
+++ b/drivers/md/dm-cache-block-types.h
@@ -19,7 +19,6 @@
typedef dm_block_t __bitwise__ dm_oblock_t;
typedef uint32_t __bitwise__ dm_cblock_t;
-typedef dm_block_t __bitwise__ dm_dblock_t;
static inline dm_oblock_t to_oblock(dm_block_t b)
{
@@ -41,14 +40,4 @@ static inline uint32_t from_cblock(dm_cblock_t b)
return (__force uint32_t) b;
}
-static inline dm_dblock_t to_dblock(dm_block_t b)
-{
- return (__force dm_dblock_t) b;
-}
-
-static inline dm_block_t from_dblock(dm_dblock_t b)
-{
- return (__force dm_block_t) b;
-}
-
#endif /* DM_CACHE_BLOCK_TYPES_H */
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 9ef0752e8a08..4ead4ba60656 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -109,7 +109,7 @@ struct dm_cache_metadata {
dm_block_t discard_root;
sector_t discard_block_size;
- dm_dblock_t discard_nr_blocks;
+ dm_oblock_t discard_nr_blocks;
sector_t data_block_size;
dm_cblock_t cache_blocks;
@@ -120,6 +120,12 @@ struct dm_cache_metadata {
unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
size_t policy_hint_size;
struct dm_cache_statistics stats;
+
+ /*
+ * Reading the space map root can fail, so we read it into this
+ * buffer before the superblock is locked and updated.
+ */
+ __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
};
/*-------------------------------------------------------------------
@@ -260,11 +266,31 @@ static void __setup_mapping_info(struct dm_cache_metadata *cmd)
}
}
+static int __save_sm_root(struct dm_cache_metadata *cmd)
+{
+ int r;
+ size_t metadata_len;
+
+ r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
+ if (r < 0)
+ return r;
+
+ return dm_sm_copy_root(cmd->metadata_sm, &cmd->metadata_space_map_root,
+ metadata_len);
+}
+
+static void __copy_sm_root(struct dm_cache_metadata *cmd,
+ struct cache_disk_superblock *disk_super)
+{
+ memcpy(&disk_super->metadata_space_map_root,
+ &cmd->metadata_space_map_root,
+ sizeof(cmd->metadata_space_map_root));
+}
+
static int __write_initial_superblock(struct dm_cache_metadata *cmd)
{
int r;
struct dm_block *sblock;
- size_t metadata_len;
struct cache_disk_superblock *disk_super;
sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
@@ -272,12 +298,16 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
- r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
+ r = dm_tm_pre_commit(cmd->tm);
if (r < 0)
return r;
- r = dm_tm_pre_commit(cmd->tm);
- if (r < 0)
+ /*
+ * dm_sm_copy_root() can fail. So we need to do it before we start
+ * updating the superblock.
+ */
+ r = __save_sm_root(cmd);
+ if (r)
return r;
r = superblock_lock_zero(cmd, &sblock);
@@ -293,16 +323,13 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
disk_super->policy_hint_size = 0;
- r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
- metadata_len);
- if (r < 0)
- goto bad_locked;
+ __copy_sm_root(cmd, disk_super);
disk_super->mapping_root = cpu_to_le64(cmd->root);
disk_super->hint_root = cpu_to_le64(cmd->hint_root);
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
- disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
+ disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
disk_super->cache_blocks = cpu_to_le32(0);
@@ -313,10 +340,6 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
disk_super->write_misses = cpu_to_le32(0);
return dm_tm_commit(cmd->tm, sblock);
-
-bad_locked:
- dm_bm_unlock(sblock);
- return r;
}
static int __format_metadata(struct dm_cache_metadata *cmd)
@@ -496,7 +519,7 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
cmd->hint_root = le64_to_cpu(disk_super->hint_root);
cmd->discard_root = le64_to_cpu(disk_super->discard_root);
cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size);
- cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks));
+ cmd->discard_nr_blocks = to_oblock(le64_to_cpu(disk_super->discard_nr_blocks));
cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
@@ -530,8 +553,9 @@ static int __begin_transaction_flags(struct dm_cache_metadata *cmd,
disk_super = dm_block_data(sblock);
update_flags(disk_super, mutator);
read_superblock_fields(cmd, disk_super);
+ dm_bm_unlock(sblock);
- return dm_bm_flush_and_unlock(cmd->bm, sblock);
+ return dm_bm_flush(cmd->bm);
}
static int __begin_transaction(struct dm_cache_metadata *cmd)
@@ -559,7 +583,6 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
flags_mutator mutator)
{
int r;
- size_t metadata_len;
struct cache_disk_superblock *disk_super;
struct dm_block *sblock;
@@ -577,8 +600,8 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
if (r < 0)
return r;
- r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
- if (r < 0)
+ r = __save_sm_root(cmd);
+ if (r)
return r;
r = superblock_lock(cmd, &sblock);
@@ -594,7 +617,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
disk_super->hint_root = cpu_to_le64(cmd->hint_root);
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
- disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
+ disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
@@ -605,13 +628,7 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits);
disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses);
-
- r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
- metadata_len);
- if (r < 0) {
- dm_bm_unlock(sblock);
- return r;
- }
+ __copy_sm_root(cmd, disk_super);
return dm_tm_commit(cmd->tm, sblock);
}
@@ -771,15 +788,15 @@ out:
int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
sector_t discard_block_size,
- dm_dblock_t new_nr_entries)
+ dm_oblock_t new_nr_entries)
{
int r;
down_write(&cmd->root_lock);
r = dm_bitset_resize(&cmd->discard_info,
cmd->discard_root,
- from_dblock(cmd->discard_nr_blocks),
- from_dblock(new_nr_entries),
+ from_oblock(cmd->discard_nr_blocks),
+ from_oblock(new_nr_entries),
false, &cmd->discard_root);
if (!r) {
cmd->discard_block_size = discard_block_size;
@@ -792,28 +809,28 @@ int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
return r;
}
-static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
+static int __set_discard(struct dm_cache_metadata *cmd, dm_oblock_t b)
{
return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root,
- from_dblock(b), &cmd->discard_root);
+ from_oblock(b), &cmd->discard_root);
}
-static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
+static int __clear_discard(struct dm_cache_metadata *cmd, dm_oblock_t b)
{
return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root,
- from_dblock(b), &cmd->discard_root);
+ from_oblock(b), &cmd->discard_root);
}
-static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
+static int __is_discarded(struct dm_cache_metadata *cmd, dm_oblock_t b,
bool *is_discarded)
{
return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
- from_dblock(b), &cmd->discard_root,
+ from_oblock(b), &cmd->discard_root,
is_discarded);
}
static int __discard(struct dm_cache_metadata *cmd,
- dm_dblock_t dblock, bool discard)
+ dm_oblock_t dblock, bool discard)
{
int r;
@@ -826,7 +843,7 @@ static int __discard(struct dm_cache_metadata *cmd,
}
int dm_cache_set_discard(struct dm_cache_metadata *cmd,
- dm_dblock_t dblock, bool discard)
+ dm_oblock_t dblock, bool discard)
{
int r;
@@ -844,8 +861,8 @@ static int __load_discards(struct dm_cache_metadata *cmd,
dm_block_t b;
bool discard;
- for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
- dm_dblock_t dblock = to_dblock(b);
+ for (b = 0; b < from_oblock(cmd->discard_nr_blocks); b++) {
+ dm_oblock_t dblock = to_oblock(b);
if (cmd->clean_when_opened) {
r = __is_discarded(cmd, dblock, &discard);
@@ -1228,22 +1245,12 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
return 0;
}
-int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
+static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint)
{
+ struct dm_cache_metadata *cmd = context;
+ __le32 value = cpu_to_le32(hint);
int r;
- down_write(&cmd->root_lock);
- r = begin_hints(cmd, policy);
- up_write(&cmd->root_lock);
-
- return r;
-}
-
-static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
- uint32_t hint)
-{
- int r;
- __le32 value = cpu_to_le32(hint);
__dm_bless_for_disk(&value);
r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
@@ -1253,16 +1260,25 @@ static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
return r;
}
-int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock,
- uint32_t hint)
+static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
{
int r;
- if (!hints_array_initialized(cmd))
- return 0;
+ r = begin_hints(cmd, policy);
+ if (r) {
+ DMERR("begin_hints failed");
+ return r;
+ }
+
+ return policy_walk_mappings(policy, save_hint, cmd);
+}
+
+int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
+{
+ int r;
down_write(&cmd->root_lock);
- r = save_hint(cmd, cblock, hint);
+ r = write_hints(cmd, policy);
up_write(&cmd->root_lock);
return r;
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index cd906f14f98d..cd70a78623a3 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -72,14 +72,14 @@ dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd);
int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd,
sector_t discard_block_size,
- dm_dblock_t new_nr_entries);
+ dm_oblock_t new_nr_entries);
typedef int (*load_discard_fn)(void *context, sector_t discard_block_size,
- dm_dblock_t dblock, bool discarded);
+ dm_oblock_t dblock, bool discarded);
int dm_cache_load_discards(struct dm_cache_metadata *cmd,
load_discard_fn fn, void *context);
-int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_dblock_t dblock, bool discard);
+int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_oblock_t dblock, bool discard);
int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock);
int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock, dm_oblock_t oblock);
@@ -128,14 +128,7 @@ void dm_cache_dump(struct dm_cache_metadata *cmd);
* rather than querying the policy for each cblock, we let it walk its data
* structures and fill in the hints in whatever order it wishes.
*/
-
-int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *p);
-
-/*
- * requests hints for every cblock and stores in the metadata device.
- */
-int dm_cache_save_hint(struct dm_cache_metadata *cmd,
- dm_cblock_t cblock, uint32_t hint);
+int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *p);
/*
* Query method. Are all the blocks in the cache clean?
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 074b9c8e4cf0..1bf4a71919ec 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -237,9 +237,8 @@ struct cache {
/*
* origin_blocks entries, discarded if set.
*/
- dm_dblock_t discard_nr_blocks;
+ dm_oblock_t discard_nr_blocks;
unsigned long *discard_bitset;
- uint32_t discard_block_size; /* a power of 2 times sectors per block */
/*
* Rather than reconstructing the table line for the status we just
@@ -526,48 +525,33 @@ static dm_block_t block_div(dm_block_t b, uint32_t n)
return b;
}
-static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
-{
- uint32_t discard_blocks = cache->discard_block_size;
- dm_block_t b = from_oblock(oblock);
-
- if (!block_size_is_power_of_two(cache))
- discard_blocks = discard_blocks / cache->sectors_per_block;
- else
- discard_blocks >>= cache->sectors_per_block_shift;
-
- b = block_div(b, discard_blocks);
-
- return to_dblock(b);
-}
-
-static void set_discard(struct cache *cache, dm_dblock_t b)
+static void set_discard(struct cache *cache, dm_oblock_t b)
{
unsigned long flags;
atomic_inc(&cache->stats.discard_count);
spin_lock_irqsave(&cache->lock, flags);
- set_bit(from_dblock(b), cache->discard_bitset);
+ set_bit(from_oblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
}
-static void clear_discard(struct cache *cache, dm_dblock_t b)
+static void clear_discard(struct cache *cache, dm_oblock_t b)
{
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
- clear_bit(from_dblock(b), cache->discard_bitset);
+ clear_bit(from_oblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
}
-static bool is_discarded(struct cache *cache, dm_dblock_t b)
+static bool is_discarded(struct cache *cache, dm_oblock_t b)
{
int r;
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
- r = test_bit(from_dblock(b), cache->discard_bitset);
+ r = test_bit(from_oblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
return r;
@@ -579,8 +563,7 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
unsigned long flags;
spin_lock_irqsave(&cache->lock, flags);
- r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
- cache->discard_bitset);
+ r = test_bit(from_oblock(b), cache->discard_bitset);
spin_unlock_irqrestore(&cache->lock, flags);
return r;
@@ -705,7 +688,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
check_if_tick_bio_needed(cache, bio);
remap_to_origin(cache, bio);
if (bio_data_dir(bio) == WRITE)
- clear_discard(cache, oblock_to_dblock(cache, oblock));
+ clear_discard(cache, oblock);
}
static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
@@ -715,7 +698,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
remap_to_cache(cache, bio, cblock);
if (bio_data_dir(bio) == WRITE) {
set_dirty(cache, oblock, cblock);
- clear_discard(cache, oblock_to_dblock(cache, oblock));
+ clear_discard(cache, oblock);
}
}
@@ -1288,14 +1271,14 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
static void process_discard_bio(struct cache *cache, struct bio *bio)
{
dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
- cache->discard_block_size);
+ cache->sectors_per_block);
dm_block_t end_block = bio_end_sector(bio);
dm_block_t b;
- end_block = block_div(end_block, cache->discard_block_size);
+ end_block = block_div(end_block, cache->sectors_per_block);
for (b = start_block; b < end_block; b++)
- set_discard(cache, to_dblock(b));
+ set_discard(cache, to_oblock(b));
bio_endio(bio, 0);
}
@@ -2171,35 +2154,6 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca,
return 0;
}
-/*
- * We want the discard block size to be a power of two, at least the size
- * of the cache block size, and have no more than 2^14 discard blocks
- * across the origin.
- */
-#define MAX_DISCARD_BLOCKS (1 << 14)
-
-static bool too_many_discard_blocks(sector_t discard_block_size,
- sector_t origin_size)
-{
- (void) sector_div(origin_size, discard_block_size);
-
- return origin_size > MAX_DISCARD_BLOCKS;
-}
-
-static sector_t calculate_discard_block_size(sector_t cache_block_size,
- sector_t origin_size)
-{
- sector_t discard_block_size;
-
- discard_block_size = roundup_pow_of_two(cache_block_size);
-
- if (origin_size)
- while (too_many_discard_blocks(discard_block_size, origin_size))
- discard_block_size *= 2;
-
- return discard_block_size;
-}
-
#define DEFAULT_MIGRATION_THRESHOLD 2048
static int cache_create(struct cache_args *ca, struct cache **result)
@@ -2321,16 +2275,13 @@ static int cache_create(struct cache_args *ca, struct cache **result)
}
clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
- cache->discard_block_size =
- calculate_discard_block_size(cache->sectors_per_block,
- cache->origin_sectors);
- cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks);
- cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
+ cache->discard_nr_blocks = cache->origin_blocks;
+ cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks));
if (!cache->discard_bitset) {
*error = "could not allocate discard bitset";
goto bad;
}
- clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
+ clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks));
cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(cache->copier)) {
@@ -2614,16 +2565,16 @@ static int write_discard_bitset(struct cache *cache)
{
unsigned i, r;
- r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
- cache->discard_nr_blocks);
+ r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block,
+ cache->origin_blocks);
if (r) {
DMERR("could not resize on-disk discard bitset");
return r;
}
- for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
- r = dm_cache_set_discard(cache->cmd, to_dblock(i),
- is_discarded(cache, to_dblock(i)));
+ for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) {
+ r = dm_cache_set_discard(cache->cmd, to_oblock(i),
+ is_discarded(cache, to_oblock(i)));
if (r)
return r;
}
@@ -2631,30 +2582,6 @@ static int write_discard_bitset(struct cache *cache)
return 0;
}
-static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock,
- uint32_t hint)
-{
- struct cache *cache = context;
- return dm_cache_save_hint(cache->cmd, cblock, hint);
-}
-
-static int write_hints(struct cache *cache)
-{
- int r;
-
- r = dm_cache_begin_hints(cache->cmd, cache->policy);
- if (r) {
- DMERR("dm_cache_begin_hints failed");
- return r;
- }
-
- r = policy_walk_mappings(cache->policy, save_hint, cache);
- if (r)
- DMERR("policy_walk_mappings failed");
-
- return r;
-}
-
/*
* returns true on success
*/
@@ -2672,7 +2599,7 @@ static bool sync_metadata(struct cache *cache)
save_stats(cache);
- r3 = write_hints(cache);
+ r3 = dm_cache_write_hints(cache->cmd, cache->policy);
if (r3)
DMERR("could not write hints");
@@ -2720,16 +2647,14 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
}
static int load_discard(void *context, sector_t discard_block_size,
- dm_dblock_t dblock, bool discard)
+ dm_oblock_t oblock, bool discard)
{
struct cache *cache = context;
- /* FIXME: handle mis-matched block size */
-
if (discard)
- set_discard(cache, dblock);
+ set_discard(cache, oblock);
else
- clear_discard(cache, dblock);
+ clear_discard(cache, oblock);
return 0;
}
@@ -3120,8 +3045,8 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
/*
* FIXME: these limits may be incompatible with the cache device
*/
- limits->max_discard_sectors = cache->discard_block_size * 1024;
- limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
+ limits->max_discard_sectors = cache->sectors_per_block;
+ limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT;
}
static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -3145,7 +3070,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {1, 3, 0},
+ .version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
new file mode 100644
index 000000000000..414dad4cb49b
--- /dev/null
+++ b/drivers/md/dm-era-target.c
@@ -0,0 +1,1746 @@
+#include "dm.h"
+#include "persistent-data/dm-transaction-manager.h"
+#include "persistent-data/dm-bitset.h"
+#include "persistent-data/dm-space-map.h"
+
+#include <linux/dm-io.h>
+#include <linux/dm-kcopyd.h>
+#include <linux/init.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#define DM_MSG_PREFIX "era"
+
+#define SUPERBLOCK_LOCATION 0
+#define SUPERBLOCK_MAGIC 2126579579
+#define SUPERBLOCK_CSUM_XOR 146538381
+#define MIN_ERA_VERSION 1
+#define MAX_ERA_VERSION 1
+#define INVALID_WRITESET_ROOT SUPERBLOCK_LOCATION
+#define MIN_BLOCK_SIZE 8
+
+/*----------------------------------------------------------------
+ * Writeset
+ *--------------------------------------------------------------*/
+struct writeset_metadata {
+ uint32_t nr_bits;
+ dm_block_t root;
+};
+
+struct writeset {
+ struct writeset_metadata md;
+
+ /*
+ * An in core copy of the bits to save constantly doing look ups on
+ * disk.
+ */
+ unsigned long *bits;
+};
+
+/*
+ * This does not free off the on disk bitset as this will normally be done
+ * after digesting into the era array.
+ */
+static void writeset_free(struct writeset *ws)
+{
+ vfree(ws->bits);
+}
+
+static int setup_on_disk_bitset(struct dm_disk_bitset *info,
+ unsigned nr_bits, dm_block_t *root)
+{
+ int r;
+
+ r = dm_bitset_empty(info, root);
+ if (r)
+ return r;
+
+ return dm_bitset_resize(info, *root, 0, nr_bits, false, root);
+}
+
+static size_t bitset_size(unsigned nr_bits)
+{
+ return sizeof(unsigned long) * dm_div_up(nr_bits, BITS_PER_LONG);
+}
+
+/*
+ * Allocates memory for the in core bitset.
+ */
+static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks)
+{
+ ws->md.nr_bits = nr_blocks;
+ ws->md.root = INVALID_WRITESET_ROOT;
+ ws->bits = vzalloc(bitset_size(nr_blocks));
+ if (!ws->bits) {
+ DMERR("%s: couldn't allocate in memory bitset", __func__);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Wipes the in-core bitset, and creates a new on disk bitset.
+ */
+static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws)
+{
+ int r;
+
+ memset(ws->bits, 0, bitset_size(ws->md.nr_bits));
+
+ r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root);
+ if (r) {
+ DMERR("%s: setup_on_disk_bitset failed", __func__);
+ return r;
+ }
+
+ return 0;
+}
+
+static bool writeset_marked(struct writeset *ws, dm_block_t block)
+{
+ return test_bit(block, ws->bits);
+}
+
+static int writeset_marked_on_disk(struct dm_disk_bitset *info,
+ struct writeset_metadata *m, dm_block_t block,
+ bool *result)
+{
+ dm_block_t old = m->root;
+
+ /*
+ * The bitset was flushed when it was archived, so we know there'll
+ * be no change to the root.
+ */
+ int r = dm_bitset_test_bit(info, m->root, block, &m->root, result);
+ if (r) {
+ DMERR("%s: dm_bitset_test_bit failed", __func__);
+ return r;
+ }
+
+ BUG_ON(m->root != old);
+
+ return r;
+}
+
+/*
+ * Returns < 0 on error, 0 if the bit wasn't previously set, 1 if it was.
+ */
+static int writeset_test_and_set(struct dm_disk_bitset *info,
+ struct writeset *ws, uint32_t block)
+{
+ int r;
+
+ if (!test_and_set_bit(block, ws->bits)) {
+ r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root);
+ if (r) {
+ /* FIXME: fail mode */
+ return r;
+ }
+
+ return 0;
+ }
+
+ return 1;
+}
+
+/*----------------------------------------------------------------
+ * On disk metadata layout
+ *--------------------------------------------------------------*/
+#define SPACE_MAP_ROOT_SIZE 128
+#define UUID_LEN 16
+
+struct writeset_disk {
+ __le32 nr_bits;
+ __le64 root;
+} __packed;
+
+struct superblock_disk {
+ __le32 csum;
+ __le32 flags;
+ __le64 blocknr;
+
+ __u8 uuid[UUID_LEN];
+ __le64 magic;
+ __le32 version;
+
+ __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
+
+ __le32 data_block_size;
+ __le32 metadata_block_size;
+ __le32 nr_blocks;
+
+ __le32 current_era;
+ struct writeset_disk current_writeset;
+
+ /*
+ * Only these two fields are valid within the metadata snapshot.
+ */
+ __le64 writeset_tree_root;
+ __le64 era_array_root;
+
+ __le64 metadata_snap;
+} __packed;
+
+/*----------------------------------------------------------------
+ * Superblock validation
+ *--------------------------------------------------------------*/
+static void sb_prepare_for_write(struct dm_block_validator *v,
+ struct dm_block *b,
+ size_t sb_block_size)
+{
+ struct superblock_disk *disk = dm_block_data(b);