From 094bb5d766cfcdae47e332c6d6713c7029241be1 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Date: Tue, 21 Nov 2017 01:12:43 +0100
Subject: target-core: don't use "const char*" for a buffer that is written to

iscsi_parse_pr_out_transport_id launders the const away via a call to
strstr(), and then modifies the buffer (writing a nul byte) through
the return value. It's cleaner to be honest and simply declare the
parameter as "char*", fixing up the call chain, and allowing us to
drop the cast in the return statement.

Amusingly, the two current callers found it necessary to cast a
non-const pointer to a const.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_fabric_lib.c | 6 +++---
 drivers/target/target_core_internal.h   | 2 +-
 drivers/target/target_core_pr.c         | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c
index 508da345b73f..71a80257a052 100644
--- a/drivers/target/target_core_fabric_lib.c
+++ b/drivers/target/target_core_fabric_lib.c
@@ -273,7 +273,7 @@ static int iscsi_get_pr_transport_id_len(
 
 static char *iscsi_parse_pr_out_transport_id(
 	struct se_portal_group *se_tpg,
-	const char *buf,
+	char *buf,
 	u32 *out_tid_len,
 	char **port_nexus_ptr)
 {
@@ -356,7 +356,7 @@ static char *iscsi_parse_pr_out_transport_id(
 		}
 	}
 
-	return (char *)&buf[4];
+	return &buf[4];
 }
 
 int target_get_pr_transport_id_len(struct se_node_acl *nacl,
@@ -405,7 +405,7 @@ int target_get_pr_transport_id(struct se_node_acl *nacl,
 }
 
 const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg,
-		const char *buf, u32 *out_tid_len, char **port_nexus_ptr)
+		char *buf, u32 *out_tid_len, char **port_nexus_ptr)
 {
 	u32 offset;
 
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 9384d19a7326..6d53d9fcb883 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -102,7 +102,7 @@ int	target_get_pr_transport_id(struct se_node_acl *nacl,
 		struct t10_pr_registration *pr_reg, int *format_code,
 		unsigned char *buf);
 const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg,
-		const char *buf, u32 *out_tid_len, char **port_nexus_ptr);
+		char *buf, u32 *out_tid_len, char **port_nexus_ptr);
 
 /* target_core_hba.c */
 struct se_hba *core_alloc_hba(const char *, u32, u32);
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index b024613f9217..01ac306131c1 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -1601,7 +1601,7 @@ core_scsi3_decode_spec_i_port(
 			dest_rtpi = tmp_lun->lun_rtpi;
 
 			i_str = target_parse_pr_out_transport_id(tmp_tpg,
-					(const char *)ptr, &tid_len, &iport_ptr);
+					ptr, &tid_len, &iport_ptr);
 			if (!i_str)
 				continue;
 
@@ -3287,7 +3287,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
 		goto out;
 	}
 	initiator_str = target_parse_pr_out_transport_id(dest_se_tpg,
-			(const char *)&buf[24], &tmp_tid_len, &iport_ptr);
+			&buf[24], &tmp_tid_len, &iport_ptr);
 	if (!initiator_str) {
 		pr_err("SPC-3 PR REGISTER_AND_MOVE: Unable to locate"
 			" initiator_str from Transport ID\n");
-- 
cgit v1.2.3


From 26d2b3106f6015b1d19ae5f8b0cc1fc7fe8e669e Mon Sep 17 00:00:00 2001
From: tangwenji <tang.wenji@zte.com.cn>
Date: Tue, 28 Nov 2017 12:40:27 -0600
Subject: tcmu: fix page addr in tcmu_flush_dcache_range

The page addr should be update.

Signed-off-by: tangwenji <tang.wenji@zte.com.cn>
Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index a415d87f22d2..6bcaa8b5684c 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -455,12 +455,13 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 static inline void tcmu_flush_dcache_range(void *vaddr, size_t size)
 {
 	unsigned long offset = offset_in_page(vaddr);
+	void *start = vaddr - offset;
 
 	size = round_up(size+offset, PAGE_SIZE);
-	vaddr -= offset;
 
 	while (size) {
-		flush_dcache_page(virt_to_page(vaddr));
+		flush_dcache_page(virt_to_page(start));
+		start += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 }
-- 
cgit v1.2.3


From bf99ec13327bb5b0f6475aea8735c0ca34cc2a26 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:28 -0600
Subject: tcmu: merge common block release code

Have unmap_thread_fn use tcmu_blocks_release.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 6bcaa8b5684c..d9fd91ee8282 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1302,21 +1302,19 @@ static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
 	return -EINVAL;
 }
 
-static void tcmu_blocks_release(struct tcmu_dev *udev)
+static void tcmu_blocks_release(struct radix_tree_root *blocks,
+				int start, int end)
 {
 	int i;
 	struct page *page;
 
-	/* Try to release all block pages */
-	mutex_lock(&udev->cmdr_lock);
-	for (i = 0; i <= udev->dbi_max; i++) {
-		page = radix_tree_delete(&udev->data_blocks, i);
+	for (i = start; i < end; i++) {
+		page = radix_tree_delete(blocks, i);
 		if (page) {
 			__free_page(page);
 			atomic_dec(&global_db_count);
 		}
 	}
-	mutex_unlock(&udev->cmdr_lock);
 }
 
 static void tcmu_dev_kref_release(struct kref *kref)
@@ -1340,7 +1338,9 @@ static void tcmu_dev_kref_release(struct kref *kref)
 	spin_unlock_irq(&udev->commands_lock);
 	WARN_ON(!all_expired);
 
-	tcmu_blocks_release(udev);
+	mutex_lock(&udev->cmdr_lock);
+	tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
+	mutex_unlock(&udev->cmdr_lock);
 
 	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
 }
@@ -1978,8 +1978,6 @@ static int unmap_thread_fn(void *data)
 	struct tcmu_dev *udev;
 	loff_t off;
 	uint32_t start, end, block;
-	struct page *page;
-	int i;
 
 	while (!kthread_should_stop()) {
 		DEFINE_WAIT(__wait);
@@ -2027,13 +2025,7 @@ static int unmap_thread_fn(void *data)
 			unmap_mapping_range(udev->inode->i_mapping, off, 0, 1);
 
 			/* Release the block pages */
-			for (i = start; i < end; i++) {
-				page = radix_tree_delete(&udev->data_blocks, i);
-				if (page) {
-					__free_page(page);
-					atomic_dec(&global_db_count);
-				}
-			}
+			tcmu_blocks_release(&udev->data_blocks, start, end);
 			mutex_unlock(&udev->cmdr_lock);
 		}
 
-- 
cgit v1.2.3


From 89ec9cfd3b644fbc36047e36776509130d2fc1ec Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:29 -0600
Subject: tcmu: split unmap_thread_fn

Separate unmap_thread_fn to make it easier to read.

Note: this patch does not fix the bug where we might
miss a wake up call. The next patch will fix that.
This patch only separates the code into functions.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 120 ++++++++++++++++++++++----------------
 1 file changed, 70 insertions(+), 50 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index d9fd91ee8282..cab6c72eb012 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1973,71 +1973,91 @@ static struct target_backend_ops tcmu_ops = {
 	.tb_dev_attrib_attrs	= NULL,
 };
 
-static int unmap_thread_fn(void *data)
+
+static void find_free_blocks(void)
 {
 	struct tcmu_dev *udev;
 	loff_t off;
 	uint32_t start, end, block;
 
-	while (!kthread_should_stop()) {
-		DEFINE_WAIT(__wait);
-
-		prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE);
-		schedule();
-		finish_wait(&unmap_wait, &__wait);
+	mutex_lock(&root_udev_mutex);
+	list_for_each_entry(udev, &root_udev, node) {
+		mutex_lock(&udev->cmdr_lock);
 
-		if (kthread_should_stop())
-			break;
+		/* Try to complete the finished commands first */
+		tcmu_handle_completions(udev);
 
-		mutex_lock(&root_udev_mutex);
-		list_for_each_entry(udev, &root_udev, node) {
-			mutex_lock(&udev->cmdr_lock);
+		/* Skip the udevs waiting the global pool or in idle */
+		if (udev->waiting_global || !udev->dbi_thresh) {
+			mutex_unlock(&udev->cmdr_lock);
+			continue;
+		}
 
-			/* Try to complete the finished commands first */
-			tcmu_handle_completions(udev);
+		end = udev->dbi_max + 1;
+		block = find_last_bit(udev->data_bitmap, end);
+		if (block == udev->dbi_max) {
+			/*
+			 * The last bit is dbi_max, so there is
+			 * no need to shrink any blocks.
+			 */
+			mutex_unlock(&udev->cmdr_lock);
+			continue;
+		} else if (block == end) {
+			/* The current udev will goto idle state */
+			udev->dbi_thresh = start = 0;
+			udev->dbi_max = 0;
+		} else {
+			udev->dbi_thresh = start = block + 1;
+			udev->dbi_max = block;
+		}
 
-			/* Skip the udevs waiting the global pool or in idle */
-			if (udev->waiting_global || !udev->dbi_thresh) {
-				mutex_unlock(&udev->cmdr_lock);
-				continue;
-			}
+		/* Here will truncate the data area from off */
+		off = udev->data_off + start * DATA_BLOCK_SIZE;
+		unmap_mapping_range(udev->inode->i_mapping, off, 0, 1);
 
-			end = udev->dbi_max + 1;
-			block = find_last_bit(udev->data_bitmap, end);
-			if (block == udev->dbi_max) {
-				/*
-				 * The last bit is dbi_max, so there is
-				 * no need to shrink any blocks.
-				 */
-				mutex_unlock(&udev->cmdr_lock);
-				continue;
-			} else if (block == end) {
-				/* The current udev will goto idle state */
-				udev->dbi_thresh = start = 0;
-				udev->dbi_max = 0;
-			} else {
-				udev->dbi_thresh = start = block + 1;
-				udev->dbi_max = block;
-			}
+		/* Release the block pages */
+		tcmu_blocks_release(&udev->data_blocks, start, end);
+		mutex_unlock(&udev->cmdr_lock);
+	}
+	mutex_unlock(&root_udev_mutex);
+}
 
-			/* Here will truncate the data area from off */
-			off = udev->data_off + start * DATA_BLOCK_SIZE;
-			unmap_mapping_range(udev->inode->i_mapping, off, 0, 1);
+static void run_cmdr_queues(void)
+{
+	struct tcmu_dev *udev;
 
-			/* Release the block pages */
-			tcmu_blocks_release(&udev->data_blocks, start, end);
+	/*
+	 * Try to wake up the udevs who are waiting
+	 * for the global data block pool.
+	 */
+	mutex_lock(&root_udev_mutex);
+	list_for_each_entry(udev, &root_udev, node) {
+		mutex_lock(&udev->cmdr_lock);
+		if (!udev->waiting_global) {
 			mutex_unlock(&udev->cmdr_lock);
+			break;
 		}
+		mutex_unlock(&udev->cmdr_lock);
 
-		/*
-		 * Try to wake up the udevs who are waiting
-		 * for the global data pool.
-		 */
-		list_for_each_entry(udev, &root_udev, node) {
-			if (udev->waiting_global)
-				wake_up(&udev->wait_cmdr);
-		}
-		mutex_unlock(&root_udev_mutex);
+		wake_up(&udev->wait_cmdr);
+	}
+	mutex_unlock(&root_udev_mutex);
+}
+
+static int unmap_thread_fn(void *data)
+{
+	while (!kthread_should_stop()) {
+		DEFINE_WAIT(__wait);
+
+		prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE);
+		schedule();
+		finish_wait(&unmap_wait, &__wait);
+
+		if (kthread_should_stop())
+			break;
+
+		find_free_blocks();
+		run_cmdr_queues();
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 9972cebb59a653cca735178a70c8ab09a5f4de1a Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:30 -0600
Subject: tcmu: fix unmap thread race

If the unmap thread has already run find_free_blocks
but not yet run prepare_to_wait when a wake_up(&unmap_wait)
call is done, the unmap thread is going to miss the wake
call. Instead of adding checks for if new waiters were added
this just has us use a work queue which will run us again
in this type of case.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 43 +++++++++------------------------------
 1 file changed, 10 insertions(+), 33 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index cab6c72eb012..a9f5c52e8b1d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -32,7 +32,7 @@
 #include <linux/highmem.h>
 #include <linux/configfs.h>
 #include <linux/mutex.h>
-#include <linux/kthread.h>
+#include <linux/workqueue.h>
 #include <net/genetlink.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -176,12 +176,11 @@ struct tcmu_cmd {
 	unsigned long flags;
 };
 
-static struct task_struct *unmap_thread;
-static wait_queue_head_t unmap_wait;
 static DEFINE_MUTEX(root_udev_mutex);
 static LIST_HEAD(root_udev);
 
 static atomic_t global_db_count = ATOMIC_INIT(0);
+static struct work_struct tcmu_unmap_work;
 
 static struct kmem_cache *tcmu_cmd_cache;
 
@@ -389,8 +388,7 @@ static bool tcmu_get_empty_blocks(struct tcmu_dev *udev,
 
 err:
 	udev->waiting_global = true;
-	/* Try to wake up the unmap thread */
-	wake_up(&unmap_wait);
+	schedule_work(&tcmu_unmap_work);
 	return false;
 }
 
@@ -1065,8 +1063,7 @@ static void tcmu_device_timedout(struct timer_list *t)
 	idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
 	spin_unlock_irqrestore(&udev->commands_lock, flags);
 
-	/* Try to wake up the ummap thread */
-	wake_up(&unmap_wait);
+	schedule_work(&tcmu_unmap_work);
 
 	/*
 	 * We don't need to wakeup threads on wait_cmdr since they have their
@@ -2044,23 +2041,10 @@ static void run_cmdr_queues(void)
 	mutex_unlock(&root_udev_mutex);
 }
 
-static int unmap_thread_fn(void *data)
+static void tcmu_unmap_work_fn(struct work_struct *work)
 {
-	while (!kthread_should_stop()) {
-		DEFINE_WAIT(__wait);
-
-		prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE);
-		schedule();
-		finish_wait(&unmap_wait, &__wait);
-
-		if (kthread_should_stop())
-			break;
-
-		find_free_blocks();
-		run_cmdr_queues();
-	}
-
-	return 0;
+	find_free_blocks();
+	run_cmdr_queues();
 }
 
 static int __init tcmu_module_init(void)
@@ -2069,6 +2053,8 @@ static int __init tcmu_module_init(void)
 
 	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
 
+	INIT_WORK(&tcmu_unmap_work, tcmu_unmap_work_fn);
+
 	tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache",
 				sizeof(struct tcmu_cmd),
 				__alignof__(struct tcmu_cmd),
@@ -2114,17 +2100,8 @@ static int __init tcmu_module_init(void)
 	if (ret)
 		goto out_attrs;
 
-	init_waitqueue_head(&unmap_wait);
-	unmap_thread = kthread_run(unmap_thread_fn, NULL, "tcmu_unmap");
-	if (IS_ERR(unmap_thread)) {
-		ret = PTR_ERR(unmap_thread);
-		goto out_unreg_transport;
-	}
-
 	return 0;
 
-out_unreg_transport:
-	target_backend_unregister(&tcmu_ops);
 out_attrs:
 	kfree(tcmu_attrs);
 out_unreg_genl:
@@ -2139,7 +2116,7 @@ out_free_cache:
 
 static void __exit tcmu_module_exit(void)
 {
-	kthread_stop(unmap_thread);
+	cancel_work_sync(&tcmu_unmap_work);
 	target_backend_unregister(&tcmu_ops);
 	kfree(tcmu_attrs);
 	genl_unregister_family(&tcmu_genl_family);
-- 
cgit v1.2.3


From 488ebe4c355fdead39dbb3f6a51329c16cbfcc60 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:31 -0600
Subject: tcmu: move expired command completion to unmap thread

This moves the expired command completion handling to
the unmap wq, so the next patch can use a mutex
in tcmu_check_expired_cmd.

Note:
tcmu_device_timedout's use of spin_lock_irq was not needed.
The commands_lock is used between thread context (tcmu_queue_cmd_ring
and tcmu_irqcontrol (even though this is named irqcontrol it is not
run in irq context)) and timer/bh context. In the timer/bh context
bhs are disabled, so you need to use the _bh lock calls from the
thread context callers.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 48 +++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 9 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index a9f5c52e8b1d..2ccc8e61449b 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -143,6 +143,7 @@ struct tcmu_dev {
 
 	struct timer_list timeout;
 	unsigned int cmd_time_out;
+	struct list_head timedout_entry;
 
 	spinlock_t nl_cmd_lock;
 	struct tcmu_nl_cmd curr_nl_cmd;
@@ -179,6 +180,9 @@ struct tcmu_cmd {
 static DEFINE_MUTEX(root_udev_mutex);
 static LIST_HEAD(root_udev);
 
+static DEFINE_SPINLOCK(timed_out_udevs_lock);
+static LIST_HEAD(timed_out_udevs);
+
 static atomic_t global_db_count = ATOMIC_INIT(0);
 static struct work_struct tcmu_unmap_work;
 
@@ -1057,18 +1061,15 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data)
 static void tcmu_device_timedout(struct timer_list *t)
 {
 	struct tcmu_dev *udev = from_timer(udev, t, timeout);
-	unsigned long flags;
 
-	spin_lock_irqsave(&udev->commands_lock, flags);
-	idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
-	spin_unlock_irqrestore(&udev->commands_lock, flags);
+	pr_debug("%s cmd timeout has expired\n", udev->name);
 
-	schedule_work(&tcmu_unmap_work);
+	spin_lock(&timed_out_udevs_lock);
+	if (list_empty(&udev->timedout_entry))
+		list_add_tail(&udev->timedout_entry, &timed_out_udevs);
+	spin_unlock(&timed_out_udevs_lock);
 
-	/*
-	 * We don't need to wakeup threads on wait_cmdr since they have their
-	 * own timeout.
-	 */
+	schedule_work(&tcmu_unmap_work);
 }
 
 static int tcmu_attach_hba(struct se_hba *hba, u32 host_id)
@@ -1112,6 +1113,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	init_waitqueue_head(&udev->wait_cmdr);
 	mutex_init(&udev->cmdr_lock);
 
+	INIT_LIST_HEAD(&udev->timedout_entry);
 	idr_init(&udev->commands);
 	spin_lock_init(&udev->commands_lock);
 
@@ -1325,6 +1327,11 @@ static void tcmu_dev_kref_release(struct kref *kref)
 	vfree(udev->mb_addr);
 	udev->mb_addr = NULL;
 
+	spin_lock_bh(&timed_out_udevs_lock);
+	if (!list_empty(&udev->timedout_entry))
+		list_del(&udev->timedout_entry);
+	spin_unlock_bh(&timed_out_udevs_lock);
+
 	/* Upper layer should drain all requests before calling this */
 	spin_lock_irq(&udev->commands_lock);
 	idr_for_each_entry(&udev->commands, cmd, i) {
@@ -2041,8 +2048,31 @@ static void run_cmdr_queues(void)
 	mutex_unlock(&root_udev_mutex);
 }
 
+static void check_timedout_devices(void)
+{
+	struct tcmu_dev *udev, *tmp_dev;
+	LIST_HEAD(devs);
+
+	spin_lock_bh(&timed_out_udevs_lock);
+	list_splice_init(&timed_out_udevs, &devs);
+
+	list_for_each_entry_safe(udev, tmp_dev, &devs, timedout_entry) {
+		list_del_init(&udev->timedout_entry);
+		spin_unlock_bh(&timed_out_udevs_lock);
+
+		spin_lock(&udev->commands_lock);
+		idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
+		spin_unlock(&udev->commands_lock);
+
+		spin_lock_bh(&timed_out_udevs_lock);
+	}
+
+	spin_unlock_bh(&timed_out_udevs_lock);
+}
+
 static void tcmu_unmap_work_fn(struct work_struct *work)
 {
+	check_timedout_devices();
 	find_free_blocks();
 	run_cmdr_queues();
 }
-- 
cgit v1.2.3


From 6fddcb775477bb2213bd76ab62145645eb570f33 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:32 -0600
Subject: tcmu: remove commands_lock

No need for the commands_lock. The cmdr_lock is already held during
idr addition and deletion, so just grab it during traversal.

Note: This also fixes a issue where we should have been using at
least _bh locking in tcmu_handle_completions when taking the commands
lock to prevent the case where tcmu_handle_completions could be
interrupted by a timer softirq while the commands_lock is held.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 2ccc8e61449b..43583a792439 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -139,7 +139,6 @@ struct tcmu_dev {
 	struct radix_tree_root data_blocks;
 
 	struct idr commands;
-	spinlock_t commands_lock;
 
 	struct timer_list timeout;
 	unsigned int cmd_time_out;
@@ -1014,10 +1013,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 		}
 		WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD);
 
-		spin_lock(&udev->commands_lock);
 		cmd = idr_remove(&udev->commands, entry->hdr.cmd_id);
-		spin_unlock(&udev->commands_lock);
-
 		if (!cmd) {
 			pr_err("cmd_id not found, ring is broken\n");
 			set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
@@ -1115,7 +1111,6 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 
 	INIT_LIST_HEAD(&udev->timedout_entry);
 	idr_init(&udev->commands);
-	spin_lock_init(&udev->commands_lock);
 
 	timer_setup(&udev->timeout, tcmu_device_timedout, 0);
 
@@ -1333,16 +1328,14 @@ static void tcmu_dev_kref_release(struct kref *kref)
 	spin_unlock_bh(&timed_out_udevs_lock);
 
 	/* Upper layer should drain all requests before calling this */
-	spin_lock_irq(&udev->commands_lock);
+	mutex_lock(&udev->cmdr_lock);
 	idr_for_each_entry(&udev->commands, cmd, i) {
 		if (tcmu_check_and_free_pending_cmd(cmd) != 0)
 			all_expired = false;
 	}
 	idr_destroy(&udev->commands);
-	spin_unlock_irq(&udev->commands_lock);
 	WARN_ON(!all_expired);
 
-	mutex_lock(&udev->cmdr_lock);
 	tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
 	mutex_unlock(&udev->cmdr_lock);
 
@@ -2060,9 +2053,9 @@ static void check_timedout_devices(void)
 		list_del_init(&udev->timedout_entry);
 		spin_unlock_bh(&timed_out_udevs_lock);
 
-		spin_lock(&udev->commands_lock);
+		mutex_lock(&udev->cmdr_lock);
 		idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
-		spin_unlock(&udev->commands_lock);
+		mutex_unlock(&udev->cmdr_lock);
 
 		spin_lock_bh(&timed_out_udevs_lock);
 	}
-- 
cgit v1.2.3


From 810b8153c4243d2012a6ec002ddd3bbc9a9ae8c2 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:33 -0600
Subject: tcmu: release blocks for partially setup cmds

If we cannot setup a cmd because we run out of ring space
or global pages release the blocks before sleeping. This
prevents a deadlock where dev0 has waiting_blocks set and
needs N blocks, but dev1 to devX have each allocated N / X blocks
and also hit the global block limit so they went to sleep.

find_free_blocks is not able to take the sleeping dev's
blocks becaause their waiting_blocks is set and even
if it was not the block returned by find_last_bit could equal
dbi_max. The latter will probably never happen because
DATA_BLOCK_BITS is so high but in the next patches
DATA_BLOCK_BITS and TCMU_GLOBAL_MAX_BLOCKS will be settable so
it might be lower and could happen.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 43583a792439..c7541f090453 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -807,6 +807,13 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		int ret;
 		DEFINE_WAIT(__wait);
 
+		/*
+		 * Don't leave commands partially setup because the unmap
+		 * thread might need the blocks to make forward progress.
+		 */
+		tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cur);
+		tcmu_cmd_reset_dbi_cur(tcmu_cmd);
+
 		prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE);
 
 		pr_debug("sleeping for ring space\n");
-- 
cgit v1.2.3


From 1a1fc0b88e9019cb3b2f291bdcb2d03d38614690 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:34 -0600
Subject: tcmu: simplify scatter_data_area error handling

scatter_data_area always returns 0, so stop checking
for errors.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 31 +++++++------------------------
 1 file changed, 7 insertions(+), 24 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index c7541f090453..965f462eaa22 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -520,7 +520,7 @@ static inline size_t iov_tail(struct iovec *iov)
 	return (size_t)iov->iov_base + iov->iov_len;
 }
 
-static int scatter_data_area(struct tcmu_dev *udev,
+static void scatter_data_area(struct tcmu_dev *udev,
 	struct tcmu_cmd *tcmu_cmd, struct scatterlist *data_sg,
 	unsigned int data_nents, struct iovec **iov,
 	int *iov_cnt, bool copy_data)
@@ -573,8 +573,6 @@ static int scatter_data_area(struct tcmu_dev *udev,
 	}
 	if (to)
 		kunmap_atomic(to);
-
-	return 0;
 }
 
 static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
@@ -864,33 +862,18 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	iov_cnt = 0;
 	copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE
 		|| se_cmd->se_cmd_flags & SCF_BIDI);
-	ret = scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg,
-				se_cmd->t_data_nents, &iov, &iov_cnt,
-				copy_to_data_area);
-	if (ret) {
-		tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt);
-		mutex_unlock(&udev->cmdr_lock);
-
-		pr_err("tcmu: alloc and scatter data failed\n");
-		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-	}
+	scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg,
+			  se_cmd->t_data_nents, &iov, &iov_cnt,
+			  copy_to_data_area);
 	entry->req.iov_cnt = iov_cnt;
 
 	/* Handle BIDI commands */
 	iov_cnt = 0;
 	if (se_cmd->se_cmd_flags & SCF_BIDI) {
 		iov++;
-		ret = scatter_data_area(udev, tcmu_cmd,
-					se_cmd->t_bidi_data_sg,
-					se_cmd->t_bidi_data_nents,
-					&iov, &iov_cnt, false);
-		if (ret) {
-			tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt);
-			mutex_unlock(&udev->cmdr_lock);
-
-			pr_err("tcmu: alloc and scatter bidi data failed\n");
-			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-		}
+		scatter_data_area(udev, tcmu_cmd, se_cmd->t_bidi_data_sg,
+				  se_cmd->t_bidi_data_nents, &iov, &iov_cnt,
+				  false);
 	}
 	entry->req.iov_bidi_cnt = iov_cnt;
 
-- 
cgit v1.2.3


From 3c0f26ff9d040c6193b33689bbc03103854dba4d Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:35 -0600
Subject: tcmu: fix free block calculation

The blocks_left calculation does not account for free blocks
between 0 and thresh, so we could be queueing/waiting when
there are enough blocks free.

This has us add in the blocks between 0 and thresh as well as
at the end from thresh to DATA_BLOCK_BITS.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 965f462eaa22..5d1daea51079 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -637,7 +637,7 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 
 static inline size_t spc_bitmap_free(unsigned long *bitmap, uint32_t thresh)
 {
-	return DATA_BLOCK_SIZE * (thresh - bitmap_weight(bitmap, thresh));
+	return thresh - bitmap_weight(bitmap, thresh);
 }
 
 /*
@@ -677,8 +677,9 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 
 	/* try to check and get the data blocks as needed */
 	space = spc_bitmap_free(udev->data_bitmap, udev->dbi_thresh);
-	if (space < data_needed) {
-		unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh;
+	if ((space * DATA_BLOCK_SIZE) < data_needed) {
+		unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh +
+						space;
 		unsigned long grow;
 
 		if (blocks_left < blocks_needed) {
-- 
cgit v1.2.3


From 3e60913579b2fefa74eeb3269426e864f4afa7e7 Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Tue, 28 Nov 2017 12:40:36 -0600
Subject: tcmu: clean up the scatter helper

Add some comments to make the scatter code to be more readable,
and drop unused arg to new_iov.

Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 5d1daea51079..8d0dc471fce8 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -492,8 +492,7 @@ static inline size_t head_to_end(size_t head, size_t size)
 	return size - head;
 }
 
-static inline void new_iov(struct iovec **iov, int *iov_cnt,
-			   struct tcmu_dev *udev)
+static inline void new_iov(struct iovec **iov, int *iov_cnt)
 {
 	struct iovec *iovec;
 
@@ -546,19 +545,38 @@ static void scatter_data_area(struct tcmu_dev *udev,
 				to = kmap_atomic(page);
 			}
 
-			copy_bytes = min_t(size_t, sg_remaining,
-					block_remaining);
+			/*
+			 * Covert to virtual offset of the ring data area.
+			 */
 			to_offset = get_block_offset_user(udev, dbi,
 					block_remaining);
 
+			/*
+			 * The following code will gather and map the blocks
+			 * to the same iovec when the blocks are all next to
+			 * each other.
+			 */
+			copy_bytes = min_t(size_t, sg_remaining,
+					block_remaining);
 			if (*iov_cnt != 0 &&
 			    to_offset == iov_tail(*iov)) {
+				/*
+				 * Will append to the current iovec, because
+				 * the current block page is next to the
+				 * previous one.
+				 */
 				(*iov)->iov_len += copy_bytes;
 			} else {
-				new_iov(iov, iov_cnt, udev);
+				/*
+				 * Will allocate a new iovec because we are
+				 * first time here or the current block page
+				 * is not next to the previous one.
+				 */
+				new_iov(iov, iov_cnt);
 				(*iov)->iov_base = (void __user *)to_offset;
 				(*iov)->iov_len = copy_bytes;
 			}
+
 			if (copy_data) {
 				offset = DATA_BLOCK_SIZE - block_remaining;
 				memcpy(to + offset,
@@ -566,11 +584,13 @@ static void scatter_data_area(struct tcmu_dev *udev,
 				       copy_bytes);
 				tcmu_flush_dcache_range(to, copy_bytes);
 			}
+
 			sg_remaining -= copy_bytes;
 			block_remaining -= copy_bytes;
 		}
 		kunmap_atomic(from - sg->offset);
 	}
+
 	if (to)
 		kunmap_atomic(to);
 }
-- 
cgit v1.2.3


From 6fd0ce79724dabe2cd0bd8aed111cbe94755bf88 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:37 -0600
Subject: tcmu: prep queue_cmd_ring to be used by unmap wq

In the next patches we will call queue_cmd_ring from the submitting
context and also the completion path. This changes the queue_cmd_ring
return code so in the next patches we can return a sense_reason_t
and also signal if a command was requeued.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 42 +++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 15 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 8d0dc471fce8..68d1d7214eeb 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -776,8 +776,16 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd)
 	return 0;
 }
 
-static sense_reason_t
-tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
+/**
+ * queue_cmd_ring - queue cmd to ring or internally
+ * @tcmu_cmd: cmd to queue
+ * @scsi_err: TCM error code if failure (-1) returned.
+ *
+ * Returns:
+ * -1 we cannot queue internally or to the ring.
+ *  0 success
+ */
+static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err)
 {
 	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
 	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
@@ -791,8 +799,12 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	bool copy_to_data_area;
 	size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
 
-	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
-		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	*scsi_err = TCM_NO_SENSE;
+
+	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) {
+		*scsi_err = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+		return -1;
+	}
 
 	/*
 	 * Must be a certain minimum size for response sense info, but
@@ -819,7 +831,8 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 			"cmd ring/data area\n", command_size, data_length,
 			udev->cmdr_size, udev->data_size);
 		mutex_unlock(&udev->cmdr_lock);
-		return TCM_INVALID_CDB_FIELD;
+		*scsi_err = TCM_INVALID_CDB_FIELD;
+		return -1;
 	}
 
 	while (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) {
@@ -845,7 +858,8 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		finish_wait(&udev->wait_cmdr, &__wait);
 		if (!ret) {
 			pr_warn("tcmu: command timed out\n");
-			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+			*scsi_err = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+			return -1;
 		}
 
 		mutex_lock(&udev->cmdr_lock);
@@ -902,7 +916,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	if (ret) {
 		tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt);
 		mutex_unlock(&udev->cmdr_lock);
-		return TCM_OUT_OF_RESOURCES;
+
+		*scsi_err = TCM_OUT_OF_RESOURCES;
+		return -1;
 	}
 	entry->hdr.cmd_id = tcmu_cmd->cmd_id;
 
@@ -933,27 +949,23 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		mod_timer(&udev->timeout, round_jiffies_up(jiffies +
 			  msecs_to_jiffies(udev->cmd_time_out)));
 
-	return TCM_NO_SENSE;
+	return 0;
 }
 
 static sense_reason_t
 tcmu_queue_cmd(struct se_cmd *se_cmd)
 {
 	struct tcmu_cmd *tcmu_cmd;
-	sense_reason_t ret;
+	sense_reason_t scsi_ret;
 
 	tcmu_cmd = tcmu_alloc_cmd(se_cmd);
 	if (!tcmu_cmd)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	ret = tcmu_queue_cmd_ring(tcmu_cmd);
-	if (ret != TCM_NO_SENSE) {
-		pr_err("TCMU: Could not queue command\n");
-
+	if (queue_cmd_ring(tcmu_cmd, &scsi_ret) < 0)
 		tcmu_free_cmd(tcmu_cmd);
-	}
 
-	return ret;
+	return scsi_ret;
 }
 
 static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry)
-- 
cgit v1.2.3


From f890f5799a6628fe006ae524e625900186074cdb Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:38 -0600
Subject: tcmu: simplify dbi thresh handling

We do not really save a lot by trying to increase thresh
a multiple of the existing value. This just simplifies the
code by increasing it to whatever is needed for the command
being executed.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 68d1d7214eeb..2679e4dcd0f1 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -79,7 +79,6 @@
 #define DATA_BLOCK_SIZE PAGE_SIZE
 #define DATA_BLOCK_BITS (256 * 1024)
 #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE)
-#define DATA_BLOCK_INIT_BITS 128
 
 /* The total size of the ring is 8M + 256K * PAGE_SIZE */
 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
@@ -700,7 +699,6 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 	if ((space * DATA_BLOCK_SIZE) < data_needed) {
 		unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh +
 						space;
-		unsigned long grow;
 
 		if (blocks_left < blocks_needed) {
 			pr_debug("no data space: only %lu available, but ask for %zu\n",
@@ -709,23 +707,9 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
 			return false;
 		}
 
-		/* Try to expand the thresh */
-		if (!udev->dbi_thresh) {
-			/* From idle state */
-			uint32_t init_thresh = DATA_BLOCK_INIT_BITS;
-
-			udev->dbi_thresh = max(blocks_needed, init_thresh);
-		} else {
-			/*
-			 * Grow the data area by max(blocks needed,
-			 * dbi_thresh / 2), but limited to the max
-			 * DATA_BLOCK_BITS size.
-			 */
-			grow = max(blocks_needed, udev->dbi_thresh / 2);
-			udev->dbi_thresh += grow;
-			if (udev->dbi_thresh > DATA_BLOCK_BITS)
-				udev->dbi_thresh = DATA_BLOCK_BITS;
-		}
+		udev->dbi_thresh += blocks_needed;
+		if (udev->dbi_thresh > DATA_BLOCK_BITS)
+			udev->dbi_thresh = DATA_BLOCK_BITS;
 	}
 
 	return tcmu_get_empty_blocks(udev, cmd);
-- 
cgit v1.2.3


From af1dd7ff46824a94da1d90443bd07db2796bd545 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:39 -0600
Subject: tcmu: don't block submitting context for block waits

This patch has tcmu internally queue cmds if its ring buffer
is full. It also makes the TCMU_GLOBAL_MAX_BLOCKS limit a
hint instead of a hard limit, so we do not have to add any
new locks/atomics in the main IO path except when IO is not
running.

This fixes the following bugs:

1. We cannot sleep from the submitting context because it might be
called from a target recv context. This results in transport level
commands timing out. For example if the ring is full, we would
sleep, and a iscsi initiator would send a iscsi ping/nop which
times out because the target's recv thread is sleeping here.

2. Devices were not fairly scheduled to run when they hit the global
limit so they could time out waiting for ring space while others
got run.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 264 ++++++++++++++++++++++++--------------
 1 file changed, 169 insertions(+), 95 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 2679e4dcd0f1..52fc1d440d23 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -83,7 +83,10 @@
 /* The total size of the ring is 8M + 256K * PAGE_SIZE */
 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
 
-/* Default maximum of the global data blocks(512K * PAGE_SIZE) */
+/*
+ * Default number of global data blocks(512K * PAGE_SIZE)
+ * when the unmap thread will be started.
+ */
 #define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024)
 
 static u8 tcmu_kern_cmd_reply_supported;
@@ -106,6 +109,7 @@ struct tcmu_nl_cmd {
 struct tcmu_dev {
 	struct list_head node;
 	struct kref kref;
+
 	struct se_device se_dev;
 
 	char *name;
@@ -128,10 +132,9 @@ struct tcmu_dev {
 	size_t data_off;
 	size_t data_size;
 
-	wait_queue_head_t wait_cmdr;
 	struct mutex cmdr_lock;
+	struct list_head cmdr_queue;
 
-	bool waiting_global;
 	uint32_t dbi_max;
 	uint32_t dbi_thresh;
 	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
@@ -160,6 +163,7 @@ struct tcmu_dev {
 struct tcmu_cmd {
 	struct se_cmd *se_cmd;
 	struct tcmu_dev *tcmu_dev;
+	struct list_head cmdr_queue_entry;
 
 	uint16_t cmd_id;
 
@@ -174,7 +178,16 @@ struct tcmu_cmd {
 #define TCMU_CMD_BIT_EXPIRED 0
 	unsigned long flags;
 };
-
+/*
+ * To avoid dead lock the mutex lock order should always be:
+ *
+ * mutex_lock(&root_udev_mutex);
+ * ...
+ * mutex_lock(&tcmu_dev->cmdr_lock);
+ * mutex_unlock(&tcmu_dev->cmdr_lock);
+ * ...
+ * mutex_unlock(&root_udev_mutex);
+ */
 static DEFINE_MUTEX(root_udev_mutex);
 static LIST_HEAD(root_udev);
 
@@ -182,7 +195,7 @@ static DEFINE_SPINLOCK(timed_out_udevs_lock);
 static LIST_HEAD(timed_out_udevs);
 
 static atomic_t global_db_count = ATOMIC_INIT(0);
-static struct work_struct tcmu_unmap_work;
+static struct delayed_work tcmu_unmap_work;
 
 static struct kmem_cache *tcmu_cmd_cache;
 
@@ -346,10 +359,8 @@ static inline bool tcmu_get_empty_block(struct tcmu_dev *udev,
 	page = radix_tree_lookup(&udev->data_blocks, dbi);
 	if (!page) {
 		if (atomic_add_return(1, &global_db_count) >
-					TCMU_GLOBAL_MAX_BLOCKS) {
-			atomic_dec(&global_db_count);
-			return false;
-		}
+					TCMU_GLOBAL_MAX_BLOCKS)
+			schedule_delayed_work(&tcmu_unmap_work, 0);
 
 		/* try to get new page from the mm */
 		page = alloc_page(GFP_KERNEL);
@@ -380,18 +391,11 @@ static bool tcmu_get_empty_blocks(struct tcmu_dev *udev,
 {
 	int i;
 
-	udev->waiting_global = false;
-
 	for (i = tcmu_cmd->dbi_cur; i < tcmu_cmd->dbi_cnt; i++) {
 		if (!tcmu_get_empty_block(udev, tcmu_cmd))
-			goto err;
+			return false;
 	}
 	return true;
-
-err:
-	udev->waiting_global = true;
-	schedule_work(&tcmu_unmap_work);
-	return false;
 }
 
 static inline struct page *
@@ -437,6 +441,7 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
 	if (!tcmu_cmd)
 		return NULL;
 
+	INIT_LIST_HEAD(&tcmu_cmd->cmdr_queue_entry);
 	tcmu_cmd->se_cmd = se_cmd;
 	tcmu_cmd->tcmu_dev = udev;
 
@@ -742,6 +747,10 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd)
 	unsigned long tmo = udev->cmd_time_out;
 	int cmd_id;
 
+	/*
+	 * If it was on the cmdr queue waiting we do not reset the timer
+	 * for requeues and when it is finally sent to userspace.
+	 */
 	if (tcmu_cmd->cmd_id)
 		return 0;
 
@@ -753,13 +762,31 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd)
 	tcmu_cmd->cmd_id = cmd_id;
 
 	if (!tmo)
-		return 0;
+		tmo = TCMU_TIME_OUT;
+
+	pr_debug("allocated cmd %u for dev %s tmo %lu\n", tcmu_cmd->cmd_id,
+		 udev->name, tmo / MSEC_PER_SEC);
 
 	tcmu_cmd->deadline = round_jiffies_up(jiffies + msecs_to_jiffies(tmo));
 	mod_timer(&udev->timeout, tcmu_cmd->deadline);
 	return 0;
 }
 
+static int add_to_cmdr_queue(struct tcmu_cmd *tcmu_cmd)
+{
+	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
+	int ret;
+
+	ret = tcmu_setup_cmd_timer(tcmu_cmd);
+	if (ret)
+		return ret;
+
+	list_add_tail(&tcmu_cmd->cmdr_queue_entry, &udev->cmdr_queue);
+	pr_debug("adding cmd %u on dev %s to ring space wait queue\n",
+		 tcmu_cmd->cmd_id, udev->name);
+	return 0;
+}
+
 /**
  * queue_cmd_ring - queue cmd to ring or internally
  * @tcmu_cmd: cmd to queue
@@ -768,6 +795,7 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd)
  * Returns:
  * -1 we cannot queue internally or to the ring.
  *  0 success
+ *  1 internally queued to wait for ring memory to free.
  */
 static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err)
 {
@@ -805,7 +833,8 @@ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err)
 	base_command_size = tcmu_cmd_get_base_cmd_size(tcmu_cmd->dbi_cnt);
 	command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size);
 
-	mutex_lock(&udev->cmdr_lock);
+	if (!list_empty(&udev->cmdr_queue))
+		goto queue;
 
 	mb = udev->mb_addr;
 	cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
@@ -814,42 +843,18 @@ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err)
 		pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu "
 			"cmd ring/data area\n", command_size, data_length,
 			udev->cmdr_size, udev->data_size);
-		mutex_unlock(&udev->cmdr_lock);
 		*scsi_err = TCM_INVALID_CDB_FIELD;
 		return -1;
 	}
 
-	while (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) {
-		int ret;
-		DEFINE_WAIT(__wait);
-
+	if (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) {
 		/*
 		 * Don't leave commands partially setup because the unmap
 		 * thread might need the blocks to make forward progress.
 		 */
 		tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cur);
 		tcmu_cmd_reset_dbi_cur(tcmu_cmd);
-
-		prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE);
-
-		pr_debug("sleeping for ring space\n");
-		mutex_unlock(&udev->cmdr_lock);
-		if (udev->cmd_time_out)
-			ret = schedule_timeout(
-					msecs_to_jiffies(udev->cmd_time_out));
-		else
-			ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
-		finish_wait(&udev->wait_cmdr, &__wait);
-		if (!ret) {
-			pr_warn("tcmu: command timed out\n");
-			*scsi_err = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-			return -1;
-		}
-
-		mutex_lock(&udev->cmdr_lock);
-
-		/* We dropped cmdr_lock, cmd_head is stale */
-		cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
+		goto queue;
 	}
 
 	/* Insert a PAD if end-of-ring space is too small */
@@ -924,31 +929,39 @@ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err)
 
 	UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size);
 	tcmu_flush_dcache_range(mb, sizeof(*mb));
-	mutex_unlock(&udev->cmdr_lock);
 
 	/* TODO: only if FLUSH and FUA? */
 	uio_event_notify(&udev->uio_info);
 
-	if (udev->cmd_time_out)
-		mod_timer(&udev->timeout, round_jiffies_up(jiffies +
-			  msecs_to_jiffies(udev->cmd_time_out)));
-
 	return 0;
+
+queue:
+	if (add_to_cmdr_queue(tcmu_cmd)) {
+		*scsi_err = TCM_OUT_OF_RESOURCES;
+		return -1;
+	}
+
+	return 1;
 }
 
 static sense_reason_t
 tcmu_queue_cmd(struct se_cmd *se_cmd)
 {
+	struct se_device *se_dev = se_cmd->se_dev;
+	struct tcmu_dev *udev = TCMU_DEV(se_dev);
 	struct tcmu_cmd *tcmu_cmd;
 	sense_reason_t scsi_ret;
+	int ret;
 
 	tcmu_cmd = tcmu_alloc_cmd(se_cmd);
 	if (!tcmu_cmd)
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-	if (queue_cmd_ring(tcmu_cmd, &scsi_ret) < 0)
+	mutex_lock(&udev->cmdr_lock);
+	ret = queue_cmd_ring(tcmu_cmd, &scsi_ret);
+	mutex_unlock(&udev->cmdr_lock);
+	if (ret < 0)
 		tcmu_free_cmd(tcmu_cmd);
-
 	return scsi_ret;
 }
 
@@ -1036,10 +1049,15 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 		handled++;
 	}
 
-	if (mb->cmd_tail == mb->cmd_head)
-		del_timer(&udev->timeout); /* no more pending cmds */
-
-	wake_up(&udev->wait_cmdr);
+	if (mb->cmd_tail == mb->cmd_head && list_empty(&udev->cmdr_queue)) {
+		del_timer(&udev->timeout);
+		/*
+		 * not more pending or waiting commands so try to reclaim
+		 * blocks if needed.
+		 */
+		if (atomic_read(&global_db_count) > TCMU_GLOBAL_MAX_BLOCKS)
+			schedule_delayed_work(&tcmu_unmap_work, 0);
+	}
 
 	return handled;
 }
@@ -1047,6 +1065,10 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 static int tcmu_check_expired_cmd(int id, void *p, void *data)
 {
 	struct tcmu_cmd *cmd = p;
+	struct tcmu_dev *udev = cmd->tcmu_dev;
+	u8 scsi_status;
+	struct se_cmd *se_cmd;
+	bool is_running;
 
 	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
 		return 0;
@@ -1054,10 +1076,27 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data)
 	if (!time_after(jiffies, cmd->deadline))
 		return 0;
 
-	set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
-	target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION);
+	is_running = list_empty(&cmd->cmdr_queue_entry);
+	pr_debug("Timing out cmd %u on dev %s that is %s.\n",
+		 id, udev->name, is_running ? "inflight" : "queued");
+
+	se_cmd = cmd->se_cmd;
 	cmd->se_cmd = NULL;
 
+	if (is_running) {
+		set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
+		/*
+		 * target_complete_cmd will translate this to LUN COMM FAILURE
+		 */
+		scsi_status = SAM_STAT_CHECK_CONDITION;
+	} else {
+		list_del_init(&cmd->cmdr_queue_entry);
+
+		idr_remove(&udev->commands, id);
+		tcmu_free_cmd(cmd);
+		scsi_status = SAM_STAT_TASK_SET_FULL;
+	}
+	target_complete_cmd(se_cmd, scsi_status);
 	return 0;
 }
 
@@ -1072,7 +1111,7 @@ static void tcmu_device_timedout(struct timer_list *t)
 		list_add_tail(&udev->timedout_entry, &timed_out_udevs);
 	spin_unlock(&timed_out_udevs_lock);
 
-	schedule_work(&tcmu_unmap_work);
+	schedule_delayed_work(&tcmu_unmap_work, 0);
 }
 
 static int tcmu_attach_hba(struct se_hba *hba, u32 host_id)
@@ -1113,10 +1152,10 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	udev->hba = hba;
 	udev->cmd_time_out = TCMU_TIME_OUT;
 
-	init_waitqueue_head(&udev->wait_cmdr);
 	mutex_init(&udev->cmdr_lock);
 
 	INIT_LIST_HEAD(&udev->timedout_entry);
+	INIT_LIST_HEAD(&udev->cmdr_queue);
 	idr_init(&udev->commands);
 
 	timer_setup(&udev->timeout, tcmu_device_timedout, 0);
@@ -1129,13 +1168,63 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	return &udev->se_dev;
 }
 
+static bool run_cmdr_queue(struct tcmu_dev *udev)
+{
+	struct tcmu_cmd *tcmu_cmd, *tmp_cmd;
+	LIST_HEAD(cmds);
+	bool drained = true;
+	sense_reason_t scsi_ret;
+	int ret;
+
+	if (list_empty(&udev->cmdr_queue))
+		return true;
+
+	pr_debug("running %s's cmdr queue\n", udev->name);
+
+	list_splice_init(&udev->cmdr_queue, &cmds);
+
+	list_for_each_entry_safe(tcmu_cmd, tmp_cmd, &cmds, cmdr_queue_entry) {
+		list_del_init(&tcmu_cmd->cmdr_queue_entry);
+
+	        pr_debug("removing cmd %u on dev %s from queue\n",
+		         tcmu_cmd->cmd_id, udev->name);
+
+		ret = queue_cmd_ring(tcmu_cmd, &scsi_ret);
+		if (ret < 0) {
+		        pr_debug("cmd %u on dev %s failed with %u\n",
+			         tcmu_cmd->cmd_id, udev->name, scsi_ret);
+
+			idr_remove(&udev->commands, tcmu_cmd->cmd_id);
+			/*
+			 * Ignore scsi_ret for now. target_complete_cmd
+			 * drops it.
+			 */
+			target_complete_cmd(tcmu_cmd->se_cmd,
+					    SAM_STAT_CHECK_CONDITION);
+			tcmu_free_cmd(tcmu_cmd);
+		} else if (ret > 0) {
+			pr_debug("ran out of space during cmdr queue run\n");
+			/*
+			 * cmd was requeued, so just put all cmds back in
+			 * the queue
+			 */
+			list_splice_tail(&cmds, &udev->cmdr_queue);
+			drained = false;
+			goto done;
+		}
+	}
+done:
+	return drained;
+}
+
 static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
 {
-	struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info);
+	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
 
-	mutex_lock(&tcmu_dev->cmdr_lock);
-	tcmu_handle_completions(tcmu_dev);
-	mutex_unlock(&tcmu_dev->cmdr_lock);
+	mutex_lock(&udev->cmdr_lock);
+	tcmu_handle_completions(udev);
+	run_cmdr_queue(udev);
+	mutex_unlock(&udev->cmdr_lock);
 
 	return 0;
 }
@@ -1531,7 +1620,6 @@ static int tcmu_configure_device(struct se_device *dev)
 	udev->data_off = CMDR_SIZE;
 	udev->data_size = DATA_SIZE;
 	udev->dbi_thresh = 0; /* Default in Idle state */
-	udev->waiting_global = false;
 
 	/* Initialise the mailbox of the ring buffer */
 	mb = udev->mb_addr;
@@ -1977,12 +2065,14 @@ static struct target_backend_ops tcmu_ops = {
 	.tb_dev_attrib_attrs	= NULL,
 };
 
-
 static void find_free_blocks(void)
 {
 	struct tcmu_dev *udev;
 	loff_t off;
-	uint32_t start, end, block;
+	u32 start, end, block, total_freed = 0;
+
+	if (atomic_read(&global_db_count) <= TCMU_GLOBAL_MAX_BLOCKS)
+		return;
 
 	mutex_lock(&root_udev_mutex);
 	list_for_each_entry(udev, &root_udev, node) {
@@ -1991,8 +2081,8 @@ static void find_free_blocks(void)
 		/* Try to complete the finished commands first */
 		tcmu_handle_completions(udev);
 
-		/* Skip the udevs waiting the global pool or in idle */
-		if (udev->waiting_global || !udev->dbi_thresh) {
+		/* Skip the udevs in idle */
+		if (!udev->dbi_thresh) {
 			mutex_unlock(&udev->cmdr_lock);
 			continue;
 		}
@@ -2001,8 +2091,8 @@ static void find_free_blocks(void)
 		block = find_last_bit(udev->data_bitmap, end);
 		if (block == udev->dbi_max) {
 			/*
-			 * The last bit is dbi_max, so there is
-			 * no need to shrink any blocks.
+			 * The last bit is dbi_max, so it is not possible
+			 * reclaim any blocks.
 			 */
 			mutex_unlock(&udev->cmdr_lock);
 			continue;
@@ -2022,30 +2112,15 @@ static void find_free_blocks(void)
 		/* Release the block pages */
 		tcmu_blocks_release(&udev->data_blocks, start, end);
 		mutex_unlock(&udev->cmdr_lock);
-	}
-	mutex_unlock(&root_udev_mutex);
-}
 
-static void run_cmdr_queues(void)
-{
-	struct tcmu_dev *udev;
-
-	/*
-	 * Try to wake up the udevs who are waiting
-	 * for the global data block pool.
-	 */
-	mutex_lock(&root_udev_mutex);
-	list_for_each_entry(udev, &root_udev, node) {
-		mutex_lock(&udev->cmdr_lock);
-		if (!udev->waiting_global) {
-			mutex_unlock(&udev->cmdr_lock);
-			break;
-		}
-		mutex_unlock(&udev->cmdr_lock);
-
-		wake_up(&udev->wait_cmdr);
+		total_freed += end - start;
+		pr_debug("Freed %u blocks (total %u) from %s.\n", end - start,
+			 total_freed, udev->name);
 	}
 	mutex_unlock(&root_udev_mutex);
+
+	if (atomic_read(&global_db_count) > TCMU_GLOBAL_MAX_BLOCKS)
+		schedule_delayed_work(&tcmu_unmap_work, msecs_to_jiffies(5000));
 }
 
 static void check_timedout_devices(void)
@@ -2074,7 +2149,6 @@ static void tcmu_unmap_work_fn(struct work_struct *work)
 {
 	check_timedout_devices();
 	find_free_blocks();
-	run_cmdr_queues();
 }
 
 static int __init tcmu_module_init(void)
@@ -2083,7 +2157,7 @@ static int __init tcmu_module_init(void)
 
 	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
 
-	INIT_WORK(&tcmu_unmap_work, tcmu_unmap_work_fn);
+	INIT_DELAYED_WORK(&tcmu_unmap_work, tcmu_unmap_work_fn);
 
 	tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache",
 				sizeof(struct tcmu_cmd),
@@ -2146,7 +2220,7 @@ out_free_cache:
 
 static void __exit tcmu_module_exit(void)
 {
-	cancel_work_sync(&tcmu_unmap_work);
+	cancel_delayed_work_sync(&tcmu_unmap_work);
 	target_backend_unregister(&tcmu_ops);
 	kfree(tcmu_attrs);
 	genl_unregister_family(&tcmu_genl_family);
-- 
cgit v1.2.3


From 9103575ae34e9d60d40940bebf47fc9e9652067a Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:40 -0600
Subject: tcmu: make ring buffer timer configurable

This adds a timer, qfull_time_out, that controls how long a
device will wait for ring buffer space to open before
failing the commands in the queue. It is useful to separate
this timer from the cmd_time_out and default 30 sec one,
because for HA setups cmd_time_out may be disbled and 30
seconds is too long to wait when some OSs like ESX will
timeout commands after as little as 8 - 15 seconds.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 149 +++++++++++++++++++++++++++++---------
 1 file changed, 115 insertions(+), 34 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 52fc1d440d23..c6a0c3198ccc 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -142,8 +142,12 @@ struct tcmu_dev {
 
 	struct idr commands;
 
-	struct timer_list timeout;
+	struct timer_list cmd_timer;
 	unsigned int cmd_time_out;
+
+	struct timer_list qfull_timer;
+	int qfull_time_out;
+
 	struct list_head timedout_entry;
 
 	spinlock_t nl_cmd_lock;
@@ -741,18 +745,14 @@ static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd,
 	return command_size;
 }
 
-static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd)
+static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd, unsigned int tmo,
+				struct timer_list *timer)
 {
 	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
-	unsigned long tmo = udev->cmd_time_out;
 	int cmd_id;
 
-	/*
-	 * If it was on the cmdr queue waiting we do not reset the timer
-	 * for requeues and when it is finally sent to userspace.
-	 */
 	if (tcmu_cmd->cmd_id)
-		return 0;
+		goto setup_timer;
 
 	cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 1, USHRT_MAX, GFP_NOWAIT);
 	if (cmd_id < 0) {
@@ -761,23 +761,38 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd)
 	}
 	tcmu_cmd->cmd_id = cmd_id;
 
-	if (!tmo)
-		tmo = TCMU_TIME_OUT;
-
 	pr_debug("allocated cmd %u for dev %s tmo %lu\n", tcmu_cmd->cmd_id,
 		 udev->name, tmo / MSEC_PER_SEC);
 
+setup_timer:
+	if (!tmo)
+		return 0;
+
 	tcmu_cmd->deadline = round_jiffies_up(jiffies + msecs_to_jiffies(tmo));
-	mod_timer(&udev->timeout, tcmu_cmd->deadline);
+	mod_timer(timer, tcmu_cmd->deadline);
 	return 0;
 }
 
 static int add_to_cmdr_queue(struct tcmu_cmd *tcmu_cmd)
 {
 	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
+	unsigned int tmo;
 	int ret;
 
-	ret = tcmu_setup_cmd_timer(tcmu_cmd);
+	/*
+	 * For backwards compat if qfull_time_out is not set use
+	 * cmd_time_out and if that's not set use the default time out.
+	 */
+	if (!udev->qfull_time_out)
+		return -ETIMEDOUT;
+	else if (udev->qfull_time_out > 0)
+		tmo = udev->qfull_time_out;
+	else if (udev->cmd_time_out)
+		tmo = udev->cmd_time_out;
+	else
+		tmo = TCMU_TIME_OUT;
+
+	ret = tcmu_setup_cmd_timer(tcmu_cmd, tmo, &udev->qfull_timer);
 	if (ret)
 		return ret;
 
@@ -901,7 +916,8 @@ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err)
 	}
 	entry->req.iov_bidi_cnt = iov_cnt;
 
-	ret = tcmu_setup_cmd_timer(tcmu_cmd);
+	ret = tcmu_setup_cmd_timer(tcmu_cmd, udev->cmd_time_out,
+				   &udev->cmd_timer);
 	if (ret) {
 		tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt);
 		mutex_unlock(&udev->cmdr_lock);
@@ -1049,14 +1065,19 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 		handled++;
 	}
 
-	if (mb->cmd_tail == mb->cmd_head && list_empty(&udev->cmdr_queue)) {
-		del_timer(&udev->timeout);
-		/*
-		 * not more pending or waiting commands so try to reclaim
-		 * blocks if needed.
-		 */
-		if (atomic_read(&global_db_count) > TCMU_GLOBAL_MAX_BLOCKS)
-			schedule_delayed_work(&tcmu_unmap_work, 0);
+	if (mb->cmd_tail == mb->cmd_head) {
+		/* no more pending commands */
+		del_timer(&udev->cmd_timer);
+
+		if (list_empty(&udev->cmdr_queue)) {
+			/*
+			 * no more pending or waiting commands so try to
+			 * reclaim blocks if needed.
+			 */
+			if (atomic_read(&global_db_count) >
+			    TCMU_GLOBAL_MAX_BLOCKS)
+				schedule_delayed_work(&tcmu_unmap_work, 0);
+		}
 	}
 
 	return handled;
@@ -1077,13 +1098,15 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data)
 		return 0;
 
 	is_running = list_empty(&cmd->cmdr_queue_entry);
-	pr_debug("Timing out cmd %u on dev %s that is %s.\n",
-		 id, udev->name, is_running ? "inflight" : "queued");
-
-	se_cmd = cmd->se_cmd;
-	cmd->se_cmd = NULL;
 
 	if (is_running) {
+		/*
+		 * If cmd_time_out is disabled but qfull is set deadline
+		 * will only reflect the qfull timeout. Ignore it.
+		 */
+		if (!udev->cmd_time_out)
+			return 0;
+
 		set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
 		/*
 		 * target_complete_cmd will translate this to LUN COMM FAILURE
@@ -1096,16 +1119,18 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data)
 		tcmu_free_cmd(cmd);
 		scsi_status = SAM_STAT_TASK_SET_FULL;
 	}
+
+	pr_debug("Timing out cmd %u on dev %s that is %s.\n",
+		 id, udev->name, is_running ? "inflight" : "queued");
+
+	se_cmd = cmd->se_cmd;
+	cmd->se_cmd = NULL;
 	target_complete_cmd(se_cmd, scsi_status);
 	return 0;
 }
 
-static void tcmu_device_timedout(struct timer_list *t)
+static void tcmu_device_timedout(struct tcmu_dev *udev)
 {
-	struct tcmu_dev *udev = from_timer(udev, t, timeout);
-
-	pr_debug("%s cmd timeout has expired\n", udev->name);
-
 	spin_lock(&timed_out_udevs_lock);
 	if (list_empty(&udev->timedout_entry))
 		list_add_tail(&udev->timedout_entry, &timed_out_udevs);
@@ -1114,6 +1139,22 @@ static void tcmu_device_timedout(struct timer_list *t)
 	schedule_delayed_work(&tcmu_unmap_work, 0);
 }
 
+static void tcmu_cmd_timedout(struct timer_list *t)
+{
+	struct tcmu_dev *udev = from_timer(udev, t, cmd_timer);
+
+	pr_debug("%s cmd timeout has expired\n", udev->name);
+	tcmu_device_timedout(udev);
+}
+
+static void tcmu_qfull_timedout(struct timer_list *t)
+{
+	struct tcmu_dev *udev = from_timer(udev, t, qfull_timer);
+
+	pr_debug("%s qfull timeout has expired\n", udev->name);
+	tcmu_device_timedout(udev);
+}
+
 static int tcmu_attach_hba(struct se_hba *hba, u32 host_id)
 {
 	struct tcmu_hba *tcmu_hba;
@@ -1151,6 +1192,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 
 	udev->hba = hba;
 	udev->cmd_time_out = TCMU_TIME_OUT;
+	udev->qfull_time_out = -1;
 
 	mutex_init(&udev->cmdr_lock);
 
@@ -1158,7 +1200,8 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	INIT_LIST_HEAD(&udev->cmdr_queue);
 	idr_init(&udev->commands);
 
-	timer_setup(&udev->timeout, tcmu_device_timedout, 0);
+	timer_setup(&udev->qfull_timer, tcmu_qfull_timedout, 0);
+	timer_setup(&udev->cmd_timer, tcmu_cmd_timedout, 0);
 
 	init_waitqueue_head(&udev->nl_cmd_wq);
 	spin_lock_init(&udev->nl_cmd_lock);
@@ -1213,6 +1256,8 @@ static bool run_cmdr_queue(struct tcmu_dev *udev)
 			goto done;
 		}
 	}
+	if (list_empty(&udev->cmdr_queue))
+		del_timer(&udev->qfull_timer);
 done:
 	return drained;
 }
@@ -1712,7 +1757,8 @@ static void tcmu_destroy_device(struct se_device *dev)
 {
 	struct tcmu_dev *udev = TCMU_DEV(dev);
 
-	del_timer_sync(&udev->timeout);
+	del_timer_sync(&udev->cmd_timer);
+	del_timer_sync(&udev->qfull_timer);
 
 	mutex_lock(&root_udev_mutex);
 	list_del(&udev->node);
@@ -1893,6 +1939,40 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag
 }
 CONFIGFS_ATTR(tcmu_, cmd_time_out);
 
+static ssize_t tcmu_qfull_time_out_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+						struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+
+	return snprintf(page, PAGE_SIZE, "%ld\n", udev->qfull_time_out <= 0 ?
+			udev->qfull_time_out :
+			udev->qfull_time_out / MSEC_PER_SEC);
+}
+
+static ssize_t tcmu_qfull_time_out_store(struct config_item *item,
+					 const char *page, size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+	s32 val;
+	int ret;
+
+	ret = kstrtos32(page, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	if (val >= 0) {
+		udev->qfull_time_out = val * MSEC_PER_SEC;
+	} else {
+		printk(KERN_ERR "Invalid qfull timeout value %d\n", val);
+		return -EINVAL;
+	}
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, qfull_time_out);
+
 static ssize_t tcmu_dev_config_show(struct config_item *item, char *page)
 {
 	struct se_dev_attrib *da = container_of(to_config_group(item),
@@ -2038,6 +2118,7 @@ CONFIGFS_ATTR(tcmu_, emulate_write_cache);
 
 static struct configfs_attribute *tcmu_attrib_attrs[] = {
 	&tcmu_attr_cmd_time_out,
+	&tcmu_attr_qfull_time_out,
 	&tcmu_attr_dev_config,
 	&tcmu_attr_dev_size,
 	&tcmu_attr_emulate_write_cache,
-- 
cgit v1.2.3


From 80eb876138a1adc7d30831ce275ea744c050d97e Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Tue, 28 Nov 2017 12:40:41 -0600
Subject: tcmu: allow max block and global max blocks to be settable

Users might have a physical system to a target so they could
have a lot more than 2 gigs of memory they want to devote to
tcmu. OTOH, we could be running in a vm and so a 2 gig
global and 1 gig per dev limit might be too high. This patch
allows the user to specify the limits.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 143 +++++++++++++++++++++++++++++++++-----
 1 file changed, 124 insertions(+), 19 deletions(-)

(limited to 'drivers')

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index c6a0c3198ccc..bac08bc72e3b 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -77,9 +77,13 @@
  * the total size is 256K * PAGE_SIZE.
  */
 #define DATA_BLOCK_SIZE PAGE_SIZE
-#define DATA_BLOCK_BITS (256 * 1024)
+#define DATA_BLOCK_SHIFT PAGE_SHIFT
+#define DATA_BLOCK_BITS_DEF (256 * 1024)
 #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE)
 
+#define TCMU_MBS_TO_BLOCKS(_mbs) (_mbs << (20 - DATA_BLOCK_SHIFT))
+#define TCMU_BLOCKS_TO_MBS(_blocks) (_blocks >> (20 - DATA_BLOCK_SHIFT))
+
 /* The total size of the ring is 8M + 256K * PAGE_SIZE */
 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
 
@@ -87,7 +91,7 @@
  * Default number of global data blocks(512K * PAGE_SIZE)
  * when the unmap thread will be started.
  */
-#define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024)
+#define TCMU_GLOBAL_MAX_BLOCKS_DEF (512 * 1024)
 
 static u8 tcmu_kern_cmd_reply_supported;
 
@@ -131,13 +135,15 @@ struct tcmu_dev {
 	/* Must add data_off and mb_addr to get the address */
 	size_t data_off;
 	size_t data_size;
+	uint32_t max_blocks;
+	size_t ring_size;
 
 	struct mutex cmdr_lock;
 	struct list_head cmdr_queue;
 
 	uint32_t dbi_max;
 	uint32_t dbi_thresh;
-	DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS);
+	unsigned long *data_bitmap;
 	struct radix_tree_root data_blocks;
 
 	struct idr commands;
@@ -198,10 +204,51 @@ static LIST_HEAD(root_udev);
 static DEFINE_SPINLOCK(timed_out_udevs_lock);
 static LIST_HEAD(timed_out_udevs);
 
+static struct kmem_cache *tcmu_cmd_cache;
+
 static atomic_t global_db_count = ATOMIC_INIT(0);
 static struct de