dm cache: significant rework to leverage dm-bio-prison-v2

The cache policy interfaces have been updated to work well with the new bio-prison v2 interface's ability to queue work immediately (promotion, demotion, etc) -- overriding benefit being reduced latency on processing IO through the cache. Previously such work would be left for the DM cache core to queue on various lists and then process in batches later -- this caused a serious delay in latency for IO driven by the cache. The background tracker code was factored out so that all cache policies can make use of it. Also, the "cleaner" policy has been removed and is now a variant of the smq policy that simply disallows migrations. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
author: Joe Thornber <ejt@redhat.com> 2016-12-15 04:57:31 -0500
committer: Mike Snitzer <snitzer@redhat.com> 2017-03-07 13:28:31 -0500
commit: b29d4986d0da1a27cd35917cdb433672f5c95d7f (patch)
tree: a5d94b86cf1eb759bfef5761015135d747e80561 /drivers
parent: 742c8fdc31e820503f9267070311d894978d1349 (diff)
10 files changed, 1922 insertions, 2399 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index b7767da50c26..982cd0626bc7 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -325,14 +325,6 @@ config DM_CACHE_SMQ
          of less memory utilization, improved performance and increased
          adaptability in the face of changing workloads.
 
-config DM_CACHE_CLEANER
-       tristate "Cleaner Cache Policy (EXPERIMENTAL)"
-       depends on DM_CACHE
-       default y
-       ---help---
-         A simple cache policy that writes back all data to the
-         origin.  Used when decommissioning a dm-cache.
-
 config DM_ERA
        tristate "Era target (EXPERIMENTAL)"
        depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index d378b1db7852..2801b2fb452d 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -13,9 +13,9 @@ dm-log-userspace-y \
 		+= dm-log-userspace-base.o dm-log-userspace-transfer.o
 dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o
 dm-thin-pool-y	+= dm-thin.o dm-thin-metadata.o
-dm-cache-y	+= dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o
+dm-cache-y	+= dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \
+		    dm-cache-background-tracker.o
 dm-cache-smq-y   += dm-cache-policy-smq.o
-dm-cache-cleaner-y += dm-cache-policy-cleaner.o
 dm-era-y	+= dm-era-target.o
 dm-verity-y	+= dm-verity-target.o
 md-mod-y	+= md.o bitmap.o
@@ -57,7 +57,6 @@ obj-$(CONFIG_DM_THIN_PROVISIONING)	+= dm-thin-pool.o
 obj-$(CONFIG_DM_VERITY)		+= dm-verity.o
 obj-$(CONFIG_DM_CACHE)		+= dm-cache.o
 obj-$(CONFIG_DM_CACHE_SMQ)	+= dm-cache-smq.o
-obj-$(CONFIG_DM_CACHE_CLEANER)	+= dm-cache-cleaner.o
 obj-$(CONFIG_DM_ERA)		+= dm-era.o
 obj-$(CONFIG_DM_LOG_WRITES)	+= dm-log-writes.o
 
diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c
new file mode 100644
index 000000000000..9b1afdfb13f0
--- /dev/null
+++ b/drivers/md/dm-cache-background-tracker.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2017 Red Hat. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-cache-background-tracker.h"
+
+/*----------------------------------------------------------------*/
+
+#define DM_MSG_PREFIX "dm-background-tracker"
+
+struct bt_work {
+	struct list_head list;
+	struct rb_node node;
+	struct policy_work work;
+};
+
+struct background_tracker {
+	unsigned max_work;
+	atomic_t pending_promotes;
+	atomic_t pending_writebacks;
+	atomic_t pending_demotes;
+
+	struct list_head issued;
+	struct list_head queued;
+	struct rb_root pending;
+
+	struct kmem_cache *work_cache;
+};
+
+struct background_tracker *btracker_create(unsigned max_work)
+{
+	struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
+
+	b->max_work = max_work;
+	atomic_set(&b->pending_promotes, 0);
+	atomic_set(&b->pending_writebacks, 0);
+	atomic_set(&b->pending_demotes, 0);
+
+	INIT_LIST_HEAD(&b->issued);
+	INIT_LIST_HEAD(&b->queued);
+
+	b->pending = RB_ROOT;
+	b->work_cache = KMEM_CACHE(bt_work, 0);
+	if (!b->work_cache) {
+		DMERR("couldn't create mempool for background work items");
+		kfree(b);
+		b = NULL;
+	}
+
+	return b;
+}
+EXPORT_SYMBOL_GPL(btracker_create);
+
+void btracker_destroy(struct background_tracker *b)
+{
+	kmem_cache_destroy(b->work_cache);
+	kfree(b);
+}
+EXPORT_SYMBOL_GPL(btracker_destroy);
+
+static int cmp_oblock(dm_oblock_t lhs, dm_oblock_t rhs)
+{
+	if (from_oblock(lhs) < from_oblock(rhs))
+		return -1;
+
+	if (from_oblock(rhs) < from_oblock(lhs))
+		return 1;
+
+	return 0;
+}
+
+static bool __insert_pending(struct background_tracker *b,
+			     struct bt_work *nw)
+{
+	int cmp;
+	struct bt_work *w;
+	struct rb_node **new = &b->pending.rb_node, *parent = NULL;
+
+	while (*new) {
+		w = container_of(*new, struct bt_work, node);
+
+		parent = *new;
+		cmp = cmp_oblock(w->work.oblock, nw->work.oblock);
+		if (cmp < 0)
+			new = &((*new)->rb_left);
+
+		else if (cmp > 0)
+			new = &((*new)->rb_right);
+
+		else
+			/* already present */
+			return false;
+	}
+
+	rb_link_node(&nw->node, parent, new);
+	rb_insert_color(&nw->node, &b->pending);
+
+	return true;
+}
+
+static struct bt_work *__find_pending(struct background_tracker *b,
+				      dm_oblock_t oblock)
+{
+	int cmp;
+	struct bt_work *w;
+	struct rb_node **new = &b->pending.rb_node;
+
+	while (*new) {
+		w = container_of(*new, struct bt_work, node);
+
+		cmp = cmp_oblock(w->work.oblock, oblock);
+		if (cmp < 0)
+			new = &((*new)->rb_left);
+
+		else if (cmp > 0)
+			new = &((*new)->rb_right);
+
+		else
+			break;
+	}
+
+	return *new ? w : NULL;
+}
+
+
+static void update_stats(struct background_tracker *b, struct policy_work *w, int delta)
+{
+	switch (w->op) {
+	case POLICY_PROMOTE:
+		atomic_add(delta, &b->pending_promotes);
+		break;
+
+	case POLICY_DEMOTE:
+		atomic_add(delta, &b->pending_demotes);
+		break;
+
+	case POLICY_WRITEBACK:
+		atomic_add(delta, &b->pending_writebacks);
+		break;
+	}
+}
+
+unsigned btracker_nr_writebacks_queued(struct background_tracker *b)
+{
+	return atomic_read(&b->pending_writebacks);
+}
+EXPORT_SYMBOL_GPL(btracker_nr_writebacks_queued);
+
+unsigned btracker_nr_demotions_queued(struct background_tracker *b)
+{
+	return atomic_read(&b->pending_demotes);
+}
+EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued);
+
+static bool max_work_reached(struct background_tracker *b)
+{
+	// FIXME: finish
+	return false;
+}
+
+int btracker_queue(struct background_tracker *b,
+		   struct policy_work *work,
+		   struct policy_work **pwork)
+{
+	struct bt_work *w;
+
+	if (pwork)
+		*pwork = NULL;
+
+	if (max_work_reached(b))
+		return -ENOMEM;
+
+	w = kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
+	if (!w)
+		return -ENOMEM;
+
+	memcpy(&w->work, work, sizeof(*work));
+
+	if (!__insert_pending(b, w)) {
+		/*
+		 * There was a race, we'll just ignore this second
+		 * bit of work for the same oblock.
+		 */
+		kmem_cache_free(b->work_cache, w);
+		return -EINVAL;
+	}
+
+	if (pwork) {
+		*pwork = &w->work;
+		list_add(&w->list, &b->issued);
+	} else
+		list_add(&w->list, &b->queued);
+	update_stats(b, &w->work, 1);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btracker_queue);
+
+/*
+ * Returns -ENODATA if there's no work.
+ */
+int btracker_issue(struct background_tracker *b, struct policy_work **work)
+{
+	struct bt_work *w;
+
+	if (list_empty(&b->queued))
+		return -ENODATA;
+
+	w = list_first_entry(&b->queued, struct bt_work, list);
+	list_move(&w->list, &b->issued);
+	*work = &w->work;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(btracker_issue);
+
+void btracker_complete(struct background_tracker *b,
+		       struct policy_work *op)
+{
+	struct bt_work *w = container_of(op, struct bt_work, work);
+
+	update_stats(b, &w->work, -1);
+	rb_erase(&w->node, &b->pending);
+	list_del(&w->list);
+	kmem_cache_free(b->work_cache, w);
+}
+EXPORT_SYMBOL_GPL(btracker_complete);
+
+bool btracker_promotion_already_present(struct background_tracker *b,
+					dm_oblock_t oblock)
+{
+	return __find_pending(b, oblock) != NULL;
+}
+EXPORT_SYMBOL_GPL(btracker_promotion_already_present);
+
+/*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-cache-background-tracker.h b/drivers/md/dm-cache-background-tracker.h
new file mode 100644
index 000000000000..27ab90dbc275
--- /dev/null
+++ b/drivers/md/dm-cache-background-tracker.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2017 Red Hat. All rights reserved.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_CACHE_BACKGROUND_WORK_H
+#define DM_CACHE_BACKGROUND_WORK_H
+
+#include <linux/vmalloc.h>
+#include "dm-cache-policy.h"
+
+/*----------------------------------------------------------------*/
+
+struct background_work;
+struct background_tracker;
+
+/*
+ * FIXME: discuss lack of locking in all methods.
+ */
+struct background_tracker *btracker_create(unsigned max_work);
+void btracker_destroy(struct background_tracker *b);
+
+unsigned btracker_nr_writebacks_queued(struct background_tracker *b);
+unsigned btracker_nr_demotions_queued(struct background_tracker *b);
+
+/*
+ * returns -EINVAL iff the work is already queued.  -ENOMEM if the work
+ * couldn't be queued for another reason.
+ */
+int btracker_queue(struct background_tracker *b,
+		   struct policy_work *work,
+		   struct policy_work **pwork);
+
+/*
+ * Returns -ENODATA if there's no work.
+ */
+int btracker_issue(struct background_tracker *b, struct policy_work **work);
+void btracker_complete(struct background_tracker *b,
+		       struct policy_work *op);
+bool btracker_promotion_already_present(struct background_tracker *b,
+					dm_oblock_t oblock);
+
+/*----------------------------------------------------------------*/
+
+#endif
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index 4f07c08cf107..179ed5bf81a3 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -50,6 +50,8 @@
 #define DM_CACHE_FEATURE_COMPAT_RO_SUPP	  0UL
 #define DM_CACHE_FEATURE_INCOMPAT_SUPP	  0UL
 
+struct dm_cache_metadata;
+
 /*
  * Reopens or creates a new, empty metadata volume.  Returns an ERR_PTR on
  * failure.  If reopening then features must match.
diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c
deleted file mode 100644
index 2e8a8f1d8358..000000000000
--- a/drivers/md/dm-cache-policy-cleaner.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (C) 2012 Red Hat. All rights reserved.
- *
- * writeback cache policy supporting flushing out dirty cache blocks.
- *
- * This file is released under the GPL.
- */
-
-#include "dm-cache-policy.h"
-#include "dm.h"
-
-#include <linux/hash.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-/*----------------------------------------------------------------*/
-
-#define DM_MSG_PREFIX "cache cleaner"
-
-/* Cache entry struct. */
-struct wb_cache_entry {
-	struct list_head list;
-	struct hlist_node hlist;
-
-	dm_oblock_t oblock;
-	dm_cblock_t cblock;
-	bool dirty:1;
-	bool pending:1;
-};
-
-struct hash {
-	struct hlist_head *table;
-	dm_block_t hash_bits;
-	unsigned nr_buckets;
-};
-
-struct policy {
-	struct dm_cache_policy policy;
-	spinlock_t lock;
-
-	struct list_head free;
-	struct list_head clean;
-	struct list_head clean_pending;
-	struct list_head dirty;
-
-	/*
-	 * We know exactly how many cblocks will be needed,
-	 * so we can allocate them up front.
-	 */
-	dm_cblock_t cache_size, nr_cblocks_allocated;
-	struct wb_cache_entry *cblocks;
-	struct hash chash;
-};
-
-/*----------------------------------------------------------------------------*/
-
-/*
- * Low-level functions.
- */
-static unsigned next_power(unsigned n, unsigned min)
-{
-	return roundup_pow_of_two(max(n, min));
-}
-
-static struct policy *to_policy(struct dm_cache_policy *p)
-{
-	return container_of(p, struct policy, policy);
-}
-
-static struct list_head *list_pop(struct list_head *q)
-{
-	struct list_head *r = q->next;
-
-	list_del(r);
-
-	return r;
-}
-
-/*----------------------------------------------------------------------------*/
-
-/* Allocate/free various resources. */
-static int alloc_hash(struct hash *hash, unsigned elts)
-{
-	hash->nr_buckets = next_power(elts >> 4, 16);
-	hash->hash_bits = __ffs(hash->nr_buckets);
-	hash->table = vzalloc(sizeof(*hash->table) * hash->nr_buckets);
-
-	return hash->table ? 0 : -ENOMEM;
-}
-
-static void free_hash(struct hash *hash)
-{
-	vfree(hash->table);
-}
-
-static int alloc_cache_blocks_with_hash(struct policy *p, dm_cblock_t cache_size)
-{
-	int r = -ENOMEM;
-
-	p->cblocks = vzalloc(sizeof(*p->cblocks) * from_cblock(cache_size));
-	if (p->cblocks) {
-		unsigned u = from_cblock(cache_size);
-
-		while (u--)
-			list_add(&p->cblocks[u].list, &p->free);
-
-		p->nr_cblocks_allocated = 0;
-
-		/* Cache entries hash. */
-		r = alloc_hash(&p->chash, from_cblock(cache_size));
-		if (r)
-			vfree(p->cblocks);
-	}
-
-	return r;
-}
-
-static void free_cache_blocks_and_hash(struct policy *p)
-{
-	free_hash(&p->chash);
-	vfree(p->cblocks);
-}
-
-static struct wb_cache_entry *alloc_cache_entry(struct policy *p)
-{
-	struct wb_cache_entry *e;
-
-	BUG_ON(from_cblock(p->nr_cblocks_allocated) >= from_cblock(p->cache_size));
-
-	e = list_entry(list_pop(&p->free), struct wb_cache_entry, list);
-	p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) + 1);
-
-	return e;
-}
-
-/*----------------------------------------------------------------------------*/
-
-/* Hash functions (lookup, insert, remove). */
-static struct wb_cache_entry *lookup_cache_entry(struct policy *p, dm_oblock_t oblock)
-{
-	struct hash *hash = &p->chash;
-	unsigned h = hash_64(from_oblock(oblock), hash->hash_bits);
-	struct wb_cache_entry *cur;
-	struct hlist_head *bucket = &hash->table[h];
-
-	hlist_for_each_entry(cur, bucket, hlist) {
-		if (cur->oblock == oblock) {
-			/* Move upfront bucket for faster access. */
-			hlist_del(&cur->hlist);
-			hlist_add_head(&cur->hlist, bucket);
-			return cur;
-		}
-	}
-
-	return NULL;
-}
-
-static void insert_cache_hash_entry(struct policy *p, struct wb_cache_entry *e)
-{
-	unsigned h = hash_64(from_oblock(e->oblock), p->chash.hash_bits);
-
-	hlist_add_head(&e->hlist, &p->chash.table[h]);
-}
-
-static void remove_cache_hash_entry(struct wb_cache_entry *e)
-{
-	hlist_del(&e->hlist);
-}
-
-/* Public interface (see dm-cache-policy.h */
-static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock,
-		  bool can_block, bool can_migrate, bool discarded_oblock,
-		  struct bio *bio, struct policy_locker *locker,
-		  struct policy_result *result)
-{
-	struct policy *p = to_policy(pe);
-	struct wb_cache_entry *e;
-	unsigned long flags;
-
-	result->op = POLICY_MISS;
-
-	if (can_block)
-		spin_lock_irqsave(&p->lock, flags);
-
-	else if (!spin_trylock_irqsave(&p->lock, flags))
-		return -EWOULDBLOCK;
-
-	e = lookup_cache_entry(p, oblock);
-	if (e) {
-		result->op = POLICY_HIT;
-		result->cblock = e->cblock;
-
-	}
-
-	spin_unlock_irqrestore(&p->lock, flags);
-
-	return 0;
-}
-
-static int wb_lookup(struct dm_cache_policy *pe, dm_oblock_t oblock, dm_cblock_t *cblock)
-{
-	int r;
-	struct policy *p = to_policy(pe);
-	struct wb_cache_entry *e;
-	unsigned long flags;
-
-	if (!spin_trylock_irqsave(&p->lock, flags))
-		return -EWOULDBLOCK;
-
-	e = lookup_cache_entry(p, oblock);
-	if (e) {
-		*cblock = e->cblock;
-		r = 0;
-
-	} else
-		r = -ENOENT;
-
-	spin_unlock_irqrestore(&p->lock, flags);
-
-	return r;
-}
-
-static void __set_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock, bool set)
-{
-	struct policy *p = to_policy(pe);
-	struct wb_cache_entry *e;
-
-	e = lookup_cache_entry(p, oblock);
-	BUG_ON(!e);
-
-	if (set) {
-		if (!e->dirty) {
-			e->dirty = true;
-			list_move(&e->list, &p->dirty);
-		}
-
-	} else {
-		if (e->dirty) {
-			e->pending = false;
-			e->dirty = false;
-			list_move(&e->list, &p->clean);
-		}
-	}
-}
-
-static void wb_set_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock)
-{
-	struct policy *p = to_policy(pe);
-	unsigned long flags;
-
-	spin_lock_irqsave(&p->lock, flags);
-	__set_clear_dirty(pe, oblock, true);
-	spin_unlock_irqrestore(&p->lock, flags);
-}
-
-static void wb_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock)
-{
-	struct policy *p = to_policy(pe);
-	unsigned long flags;
-
-	spin_lock_irqsave(&p->lock, flags);
-	__set_clear_dirty(pe, oblock, false);
-	spin_unlock_irqrestore(&p->lock, flags);
-}
-
-static void add_cache_entry(struct policy *p, struct wb_cache_entry *e)
-{
-	insert_cache_hash_entry(p, e);
-	if (e->dirty)
-		list_add(&e->list, &p->dirty);
-	else
-		list_add(&e->list, &p->clean);
-}
-
-static int wb_load_mapping(struct dm_cache_policy *pe,
-			   dm_oblock_t oblock, dm_cblock_t cblock,
-			   uint32_t hint, bool hint_valid)
-{
-	int r;
-	struct policy *p = to_policy(pe);
-	struct wb_cache_entry *e = alloc_cache_entry(p);
-
-	if (e) {
-		e->cblock = cblock;
-		e->oblock = oblock;
-		e->dirty = false; /* blocks default to clean */
-		add_cache_entry(p, e);
-		r = 0;
-
-	} else
-		r = -ENOMEM;
-
-	return r;
-}
-
-static void wb_destroy(struct dm_cache_policy *pe)
-{
-	struct policy *p = to_policy(pe);
-
-	free_cache_blocks_and_hash(p);
-	kfree(p);
-}
-
-static struct wb_cache_entry *__wb_force_remove_mapping(struct policy *p, dm_oblock_t oblock)
-{
-	struct wb_cache_entry *r = lookup_cache_entry(p, oblock);
-
-	BUG_ON(!r);
-
-	remove_cache_hash_entry(r);
-	list_del(&r->list);
-
-	return r;
-}
-
-static void wb_remove_mapping(struct dm_cache_policy *pe, dm_oblock_t oblock)
-{
-	struct policy *p = to_policy(pe);
-	struct wb_cache_entry *e;
-	unsigned long flags;
-
-	spin_lock_irqsave(&p->lock, flags);
-	e = __wb_force_remove_mapping(p, oblock);
-	list_add_tail(&e->list, &p->free);
-	BUG_ON(!from_cblock(p->nr_cblocks_allocated));
-	p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) - 1);
-	spin_unlock_irqrestore(&p->lock, flags);
-}
-
-static void wb_force_mapping(struct dm_cache_policy *pe,
-				dm_oblock_t current_oblock, dm_oblock_t oblock)
-{
-	struct policy *p = to_policy(pe);
-	struct wb_cache_entry *e;
-	unsigned long flags;
-
-	spin_lock_irqsave(&p->lock, flags);
-	e = __wb_force_remove_mapping(p, current_oblock);
-	e->oblock = oblock;
-	add_cache_entry(p, e);
-	spin_unlock_irqrestore(&p->lock, flags);
-}
-
-static struct wb_cache_entry *get_next_dirty_entry(struct policy *p)
-{
-	struct list_head *l;
-	struct wb_cache_entry *r;
-
-	if (list_empty(&p->dirty))
-		return NULL;
-
-	l = list_pop(&p->dirty);
-	r = container_of(l, struct wb_cache_entry, list);
-	list_add(l, &p->clean_pending);
-
-	return r;
-}
-
-static int wb_writeback_work(struct dm_cache_policy *pe,
-			     dm_oblock_t *oblock,
-			     dm_cblock_t *cblock,
-			     bool critical_only)
-{
-	int r = -ENOENT;
-	struct policy *p = to_policy(pe);
-	struct wb_cache_entry *e;
-	unsigned long flags;
-
-	spin_lock_irqsave(&p->lock, flags);
-
-	e = get_next_dirty_entry(p);
-	if (e) {
-		*oblock = e->oblock;
-		*cblock = e->cblock;
-		r = 0;
-	}
-
-	spin_unlock_irqrestore(&p->lock, flags);
-
-	return r;
-}
-
-static dm_cblock_t wb_residency(struct dm_cache_policy *pe)
-{
-	return to_policy(pe)->nr_cblocks_allocated;
-}
-
-/* Init the policy plugin interface function pointers. */
-static void init_policy_functions(struct policy *p)
-{
-	p->policy.destroy = wb_destroy;
-	p->policy.map = wb_map;
-	p->policy.lookup = wb_lookup;
-	p->policy.set_dirty = wb_set_dirty;
-	p->policy.clear_dirty = wb_clear_dirty;
-	p->policy.load_mapping = wb_load_mapping;
-	p->policy.get_hint = NULL;
-	p->policy.remove_mapping = wb_remove_mapping;
-	p->policy.writeback_work = wb_writeback_work;
-	p->policy.force_mapping = wb_force_mapping;
-	p->policy.residency = wb_residency;
-	p->policy.tick = NULL;
-}
-
-static struct dm_cache_policy *wb_create(dm_cblock_t cache_size,
-					 sector_t origin_size,
-					 sector_t cache_block_size)
-{
-	int r;
-	struct policy *p = kzalloc(sizeof(*p), GFP_KERNEL);
-
-	if (!p)
-		return NULL;
-
-	init_policy_functions(p);
-	INIT_LIST_HEAD(&p->free);
-	INIT_LIST_HEAD(&p->clean);
-	INIT_LIST_HEAD(&p->clean_pending);
-	INIT_LIST_HEAD(&p->dirty);
-
-	p->cache_size = cache_size;
-	spin_lock_init(&p->lock);
-
-	/* Allocate cache entry structs and add them to free list. */
-	r = alloc_cache_blocks_with_hash(p, cache_size);
-	if (!r)
-		return &p->policy;
-
-	kfree(p);
-
-	return NULL;
-}
-/*----------------------------------------------------------------------------*/
-
-static struct dm_cache_policy_type wb_policy_type = {
-	.name = "cleaner",
-	.version = {1, 0, 0},
-	.hint_size = 4,
-	.owner = THIS_MODULE,
-	.create = wb_create
-};
-
-static int __init wb_init(void)
-{
-	int r = dm_cache_policy_register(&wb_policy_type);
-
-	if (r < 0)
-		DMERR("register failed %d", r);
-	else
-		DMINFO("version %u.%u.%u loaded",
-		       wb_policy_type.version[0],
-		       wb_policy_type.version[1],
-		       wb_policy_type.version[2]);
-
-	return r;
-}
-
-static void __exit wb_exit(void)
-{
-	dm_cache_policy_unregister(&wb_policy_type);
-}
-
-module_init(wb_init);
-module_exit(wb_exit);
-
-MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("cleaner cache policy");
diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h
index 808ee0e2b2c4..56f0a23f698c 100644
--- a/drivers/md/dm-cache-policy-internal.h
+++ b/drivers/md/dm-cache-policy-internal.h
@@ -12,70 +12,65 @@
 
 /*----------------------------------------------------------------*/
 
-/*
- * Little inline functions that simplify calling the policy methods.
- */
-static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock,
-			     bool can_block, bool can_migrate, bool discarded_oblock,
-			     struct bio *bio, struct policy_locker *locker,
-			     struct policy_result *result)
+static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
+				int data_dir, bool fast_copy, bool *background_queued)
 {
-	return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, locker, result);
+	return p->lookup(p, oblock, cblock, data_dir, fast_copy, background_queued);
 }
 
-static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
+static inline int policy_lookup_with_work(struct dm_cache_policy *p,
+					  dm_oblock_t oblock, dm_cblock_t *cblock,
+					  int data_dir, bool fast_copy,
+					  struct policy_work **work)
 {
-	BUG_ON(!p->lookup);
-	return p->lookup(p, oblock, cblock);
-}
+	if (!p->lookup_with_work) {
+		*work = NULL;
+		return p->lookup(p, oblock, cblock, data_dir, fast_copy, NULL);
+	}
 
-static inline void policy_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
-{
-	if (p->set_dirty)
-		p->set_dirty(p, oblock);
+	return p->lookup_with_work(p, oblock, cblock, data_dir, fast_copy, work);
 }
 
-static inline void policy_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
+static inline int policy_get_background_work(struct dm_cache_policy *p,
+					     bool idle, struct policy_work **result)
 {
-	if (p->clear_dirty)
-		p->clear_dirty(p, oblock);
+	return p->get_background_work(p, idle, result);
 }
 
-static inline int policy_load_mapping(struct dm_cache_policy *p,
-				      dm_oblock_t oblock, dm_cblock_t cblock,
-				      uint32_t hint, bool hint_valid)
+static inline void policy_complete_background_work(struct dm_cache_policy *p,
+						   struct policy_work *work,
+						   bool success)
 {
-	return p->load_mapping(p, oblock, cblock, hint, hint_valid);
+	return p->complete_background_work(p, work, success);
 }
 
-static inline uint32_t policy_get_hint(struct dm_cache_policy *p,
-				       dm_cblock_t cblock)
+static inline void policy_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
 {
-	return p->get_hint ? p->get_hint(p, cblock) : 0;
+	p->set_dirty(p, cblock);
 }
 
-static inline int policy_writeback_work(struct dm_cache_policy *p,
-					dm_oblock_t *oblock,
-					dm_cblock_t *cblock,
-					bool critical_only)
+static inline void policy_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
 {
-	return p->writeback_work ? p->writeback_work(p, oblock, cblock, critical_only) : -ENOENT;
+	p->clear_dirty(p, cblock);
 }
 
-static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
+static inline int policy_load_mapping(struct dm_cache_policy *p,
+				      dm_oblock_t oblock, dm_cblock_t cblock,
+				      bool dirty, uint32_t hint, bool hint_valid)
 {
-	p->remove_mapping(p, oblock);
+	return p->load_mapping(p, oblock, cblock, dirty, hint, hint_valid);
 }
 
-static inline int policy_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
+static inline int policy_invalidate_mapping(struct dm_cache_policy *p,
+					    dm_cblock_t cblock)
 {
-	return p->remove_cblock(p, cblock);
+	return p->invalidate_mapping(p, cblock);
 }
 
-static inline void policy_force_mapping(struct dm_cache_policy *p,
-					dm_oblock_t current_oblock, dm_oblock_t new_oblock)
+static inline uint32_t policy_get_hint(struct dm_cache_policy *p,
+				       dm_cblock_t cblock)
 {
-	return p->force_mapping(p, current_oblock, new_oblock);
+	return p->get_hint ? p->get_hint(p, cblock) : 0;
 }
 
 static inline dm_cblock_t policy_residency(struct dm_cache_policy *p)
@@ -107,6 +102,11 @@ static inline int policy_set_config_value(struct dm_cache_policy *p,
 	return p->set_config_value ? p->set_config_value(p, key, value) : -EINVAL;
 }
 
+static inline void policy_allow_migrations(struct dm_cache_policy *p, bool allow)
+{
+	return p->allow_migrations(p, allow);
+}
+
 /*----------------------------------------------------------------*/
 
 /*
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index f19c6930a67c..74436dc2122f 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -4,8 +4,9 @@
  * This file is released under the GPL.
  */
 
-#include "dm-cache-policy.h"
+#include "dm-cache-background-tracker.h"
 #include "dm-cache-policy-internal.h"
+#include "dm-cache-policy.h"
 #include "dm.h"
 
 #include <linux/hash.h>
@@ -38,10 +39,11 @@ struct entry {
 	unsigned hash_next:28;
 	unsigned prev:28;
 	unsigned next:28;
-	unsigned level:7;
+	unsigned level:6;
 	bool dirty:1;
 	bool allocated:1;
 	bool sentinel:1;
+	bool pending_work:1;
 
 	dm_oblock_t oblock;
 };
@@ -279,14 +281,28 @@ static unsigned q_size(struct queue *q)
  */
 static void q_push(struct queue *q, struct entry *e)
 {
+	BUG_ON(e->pending_work);
+
 	if (!e->sentinel)
 		q->nr_elts++;
 
 	l_add_tail(q->es, q->qs + e->level, e);
 }
 
+static void q_push_front(struct queue *q, struct entry *e)
+{
+	BUG_ON(e->pending_work);
+
+	if (!e->sentinel)
+		q->nr_elts++;
+
+	l_add_head(q->es, q->qs + e->level, e);
+}
+
 static void q_push_before(struct queue *q, struct entry *old, struct entry *e)
 {
+	BUG_ON(e->pending_work);
+
 	if (!e->sentinel)
 		q->nr_elts++;
 
@@ -336,19 +352,6 @@ static struct entry *q_pop(struct queue *q)
 }
 
 /*
- * Pops an entry from a level that is not past a sentinel.
- */
-static struct entry *q_pop_old(struct queue *q, unsigned max_level)
-{
-	struct entry *e = q_peek(q, max_level, false);
-
-	if (e)
-		q_del(q, e);
-
-	return e;
-}
-
-/*
  * This function assumes there is a non-sentinel entry to pop.  It's only
  * used by redistribute, so we know this is true.  It also doesn't adjust
  * the q->nr_elts count.
@@ -446,45 +449,49 @@ static void q_redistribute(struct queue *q)
 				break;
 
 			e->level = level + 1u;
-			l_add_head(q->es, l_above, e);
+			l_add_tail(q->es, l_above, e);
 		}
 	}
 }
 
-static void q_requeue_before(struct queue *q, struct entry *dest, struct entry *e, unsigned extra_levels)
+static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels,
+		      struct entry *s1, struct entry *s2)
 {
 	struct entry *de;
-	unsigned new_level;
-
-	q_del(q, e);
+	unsigned sentinels_passed = 0;
+	unsigned new_level = min(q->nr_levels - 1u, e->level + extra_levels);
 
+	/* try and find an entry to swap with */
 	if (extra_levels && (e->level < q->nr_levels - 1u)) {
-		new_level = min(q->nr_levels - 1u, e->level + extra_levels);
-		for (de = l_head(q->es, q->qs + new_level); de; de = l_next(q->es, de)) {
-			if (de->sentinel)
-				continue;
+		for (de = l_head(q->es, q->qs + new_level); de && de->sentinel; de = l_next(q->es, de))
+			sentinels_passed++;
 
+		if (de) {
 			q_del(q, de);
 			de->level = e->level;
+			if (s1) {
+				switch (sentinels_passed) {
+				case 0:
+					q_push_before(q, s1, de);
+					break;
+
+				case 1:
+					q_push_before(q, s2, de);
+					break;
 
-			if (dest)
-				q_push_before(q, dest, de);
-			else
+				default:
+					q_push(q, de);
+				}
+			} else
 				q_push(q, de);
-			break;
 		}
-
-		e->level = new_level;
 	}
 
+	q_del(q, e);
+	e->level = new_level;
 	q_push(q, e);
 }
 
-static void q_requeue(struct queue *q, struct entry *e, unsigned extra_levels)
-{
-	q_requeue_before(q, NULL, e, extra_levels);
-}
-
 /*----------------------------------------------------------------*/
 
 #define FP_SHIFT 8
@@ -550,7 +557,7 @@ static enum performance stats_assess(struct stats *s)
 
 /*----------------------------------------------------------------*/
 
-struct hash_table {
+struct smq_hash_table {
 	struct entry_space *es;
 	unsigned long long hash_bits;
 	unsigned *buckets;
@@ -560,7 +567,7 @@ struct hash_table {
  * All cache entries are stored in a chained hash table.  To save space we
  * use indexing again, and only store indexes to the next entry.
  */
-static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_entries)
+static int h_init(struct smq_hash_table *ht, struct entry_space *es, unsigned nr_entries)
 {
 	unsigned i, nr_buckets;
 
@@ -578,34 +585,34 @@ static int h_init(struct hash_table *ht, struct entry_space *es, unsigned nr_ent
 	return 0;
 }
 
-static void h_exit(struct hash_table *ht)
+static void h_exit(struct smq_hash_table *ht)
 {
 	vfree(ht->buckets);
author	Joe Thornber <ejt@redhat.com>	2016-12-15 04:57:31 -0500
committer	Mike Snitzer <snitzer@redhat.com>	2017-03-07 13:28:31 -0500
commit	b29d4986d0da1a27cd35917cdb433672f5c95d7f (patch)
tree	a5d94b86cf1eb759bfef5761015135d747e80561 /drivers
parent	742c8fdc31e820503f9267070311d894978d1349 (diff)