1 files changed, 785 insertions, 2379 deletions
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index d0368682bd43..7cedb4295e9d 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -1,19 +1,12 @@
-/* Copyright 2012 STEC, Inc.
+/*
+ * Driver for sTec s1120 PCIe SSDs. sTec was acquired in 2013 by HGST and HGST
+ * was acquired by Western Digital in 2012.
+ *
+ * Copyright 2012 sTec, Inc.
+ * Copyright (c) 2017 Western Digital Corporation or its affiliates.
  *
- * This file is licensed under the terms of the 3-clause
- * BSD License (http://opensource.org/licenses/BSD-3-Clause)
- * or the GNU GPL-2.0 (http://www.gnu.org/licenses/gpl-2.0.html),
- * at your option. Both licenses are also available in the LICENSE file
- * distributed with this project. This file may not be copied, modified,
- * or distributed except in accordance with those terms.
- * Gordoni Waidhofer <gwaidhofer@stec-inc.com>
- * Initial Driver Design!
- * Thomas Swann <tswann@stec-inc.com>
- * Interrupt handling.
- * Ramprasad Chinthekindi <rchinthekindi@stec-inc.com>
- * biomode implementation.
- * Akhil Bhansali <abhansali@stec-inc.com>
- * Added support for DISCARD / FLUSH and FUA.
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2.
  */
 
 #include <linux/kernel.h>
@@ -23,11 +16,11 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/compiler.h>
 #include <linux/workqueue.h>
-#include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/time.h>
 #include <linux/hdreg.h>
@@ -37,9 +30,9 @@
 #include <linux/version.h>
 #include <linux/err.h>
 #include <linux/aer.h>
-#include <linux/ctype.h>
 #include <linux/wait.h>
-#include <linux/uio.h>
+#include <linux/stringify.h>
+#include <linux/slab_def.h>
 #include <scsi/scsi.h>
 #include <scsi/sg.h>
 #include <linux/io.h>
@@ -51,19 +44,6 @@
 static int skd_dbg_level;
 static int skd_isr_comp_limit = 4;
 
-enum {
-	STEC_LINK_2_5GTS = 0,
-	STEC_LINK_5GTS = 1,
-	STEC_LINK_8GTS = 2,
-	STEC_LINK_UNKNOWN = 0xFF
-};
-
-enum {
-	SKD_FLUSH_INITIALIZER,
-	SKD_FLUSH_ZERO_SIZE_FIRST,
-	SKD_FLUSH_DATA_SECOND,
-};
-
 #define SKD_ASSERT(expr) \
 	do { \
 		if (unlikely(!(expr))) { \
@@ -73,17 +53,11 @@ enum {
 	} while (0)
 
 #define DRV_NAME "skd"
-#define DRV_VERSION "2.2.1"
-#define DRV_BUILD_ID "0260"
 #define PFX DRV_NAME ": "
-#define DRV_BIN_VERSION 0x100
-#define DRV_VER_COMPL   "2.2.1." DRV_BUILD_ID
 
-MODULE_AUTHOR("bug-reports: support@stec-inc.com");
-MODULE_LICENSE("Dual BSD/GPL");
+MODULE_LICENSE("GPL");
 
-MODULE_DESCRIPTION("STEC s1120 PCIe SSD block driver (b" DRV_BUILD_ID ")");
-MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
+MODULE_DESCRIPTION("STEC s1120 PCIe SSD block driver");
 
 #define PCI_VENDOR_ID_STEC      0x1B39
 #define PCI_DEVICE_ID_S1120     0x0001
@@ -96,34 +70,32 @@ MODULE_VERSION(DRV_VERSION "-" DRV_BUILD_ID);
 #define SKD_PAUSE_TIMEOUT       (5 * 1000)
 
 #define SKD_N_FITMSG_BYTES      (512u)
+#define SKD_MAX_REQ_PER_MSG	14
 
-#define SKD_N_SPECIAL_CONTEXT   32u
 #define SKD_N_SPECIAL_FITMSG_BYTES      (128u)
 
 /* SG elements are 32 bytes, so we can make this 4096 and still be under the
  * 128KB limit.  That allows 4096*4K = 16M xfer size
  */
 #define SKD_N_SG_PER_REQ_DEFAULT 256u
-#define SKD_N_SG_PER_SPECIAL    256u
 
 #define SKD_N_COMPLETION_ENTRY  256u
 #define SKD_N_READ_CAP_BYTES    (8u)
 
 #define SKD_N_INTERNAL_BYTES    (512u)
 
+#define SKD_SKCOMP_SIZE							\
+	((sizeof(struct fit_completion_entry_v1) +			\
+	  sizeof(struct fit_comp_error_info)) * SKD_N_COMPLETION_ENTRY)
+
 /* 5 bits of uniqifier, 0xF800 */
-#define SKD_ID_INCR             (0x400)
 #define SKD_ID_TABLE_MASK       (3u << 8u)
 #define  SKD_ID_RW_REQUEST      (0u << 8u)
 #define  SKD_ID_INTERNAL        (1u << 8u)
-#define  SKD_ID_SPECIAL_REQUEST (2u << 8u)
 #define  SKD_ID_FIT_MSG         (3u << 8u)
 #define SKD_ID_SLOT_MASK        0x00FFu
 #define SKD_ID_SLOT_AND_TABLE_MASK 0x03FFu
 
-#define SKD_N_TIMEOUT_SLOT      4u
-#define SKD_TIMEOUT_SLOT_MASK   3u
-
 #define SKD_N_MAX_SECTORS 2048u
 
 #define SKD_MAX_RETRIES 2u
@@ -141,7 +113,6 @@ enum skd_drvr_state {
 	SKD_DRVR_STATE_ONLINE,
 	SKD_DRVR_STATE_PAUSING,
 	SKD_DRVR_STATE_PAUSED,
-	SKD_DRVR_STATE_DRAINING_TIMEOUT,
 	SKD_DRVR_STATE_RESTARTING,
 	SKD_DRVR_STATE_RESUMING,
 	SKD_DRVR_STATE_STOPPING,
@@ -158,7 +129,6 @@ enum skd_drvr_state {
 #define SKD_WAIT_BOOT_TIMO      SKD_TIMER_SECONDS(90u)
 #define SKD_STARTING_TIMO       SKD_TIMER_SECONDS(8u)
 #define SKD_RESTARTING_TIMO     SKD_TIMER_MINUTES(4u)
-#define SKD_DRAINING_TIMO       SKD_TIMER_SECONDS(6u)
 #define SKD_BUSY_TIMO           SKD_TIMER_MINUTES(20u)
 #define SKD_STARTED_BUSY_TIMO   SKD_TIMER_SECONDS(60u)
 #define SKD_START_WAIT_SECONDS  90u
@@ -169,12 +139,6 @@ enum skd_req_state {
 	SKD_REQ_STATE_BUSY,
 	SKD_REQ_STATE_COMPLETED,
 	SKD_REQ_STATE_TIMEOUT,
-	SKD_REQ_STATE_ABORTED,
-};
-
-enum skd_fit_msg_state {
-	SKD_MSG_STATE_IDLE,
-	SKD_MSG_STATE_BUSY,
 };
 
 enum skd_check_status_action {
@@ -185,34 +149,29 @@ enum skd_check_status_action {
 	SKD_CHECK_STATUS_BUSY_IMMINENT,
 };
 
-struct skd_fitmsg_context {
-	enum skd_fit_msg_state state;
-
-	struct skd_fitmsg_context *next;
+struct skd_msg_buf {
+	struct fit_msg_hdr	fmh;
+	struct skd_scsi_request	scsi[SKD_MAX_REQ_PER_MSG];
+};
 
+struct skd_fitmsg_context {
 	u32 id;
-	u16 outstanding;
 
 	u32 length;
-	u32 offset;
 
-	u8 *msg_buf;
+	struct skd_msg_buf *msg_buf;
 	dma_addr_t mb_dma_address;
 };
 
 struct skd_request_context {
 	enum skd_req_state state;
 
-	struct skd_request_context *next;
-
 	u16 id;
 	u32 fitmsg_id;
 
-	struct request *req;
 	u8 flush_cmd;
 
-	u32 timeout_stamp;
-	u8 sg_data_dir;
+	enum dma_data_direction data_dir;
 	struct scatterlist *sg;
 	u32 n_sg;
 	u32 sg_byte_count;
@@ -224,38 +183,19 @@ struct skd_request_context {
 
 	struct fit_comp_error_info err_info;
 
+	blk_status_t status;
 };
-#define SKD_DATA_DIR_HOST_TO_CARD       1
-#define SKD_DATA_DIR_CARD_TO_HOST       2
 
 struct skd_special_context {
 	struct skd_request_context req;
 
-	u8 orphaned;
-
 	void *data_buf;
 	dma_addr_t db_dma_address;
 
-	u8 *msg_buf;
+	struct skd_msg_buf *msg_buf;
 	dma_addr_t mb_dma_address;
 };
 
-struct skd_sg_io {
-	fmode_t mode;
-	void __user *argp;
-
-	struct sg_io_hdr sg;
-
-	u8 cdb[16];
-
-	u32 dxfer_len;
-	u32 iovcnt;
-	struct sg_iovec *iov;
-	struct sg_iovec no_iov_iov;
-
-	struct skd_special_context *skspcl;
-};
-
 typedef enum skd_irq_type {
 	SKD_IRQ_LEGACY,
 	SKD_IRQ_MSI,
@@ -265,7 +205,7 @@ typedef enum skd_irq_type {
 #define SKD_MAX_BARS                    2
 
 struct skd_device {
-	volatile void __iomem *mem_map[SKD_MAX_BARS];
+	void __iomem *mem_map[SKD_MAX_BARS];
 	resource_size_t mem_phys[SKD_MAX_BARS];
 	u32 mem_size[SKD_MAX_BARS];
 
@@ -276,21 +216,20 @@ struct skd_device {
 
 	spinlock_t lock;
 	struct gendisk *disk;
+	struct blk_mq_tag_set tag_set;
 	struct request_queue *queue;
+	struct skd_fitmsg_context *skmsg;
 	struct device *class_dev;
 	int gendisk_on;
 	int sync_done;
 
-	atomic_t device_count;
 	u32 devno;
 	u32 major;
-	char name[32];
 	char isr_name[30];
 
 	enum skd_drvr_state state;
 	u32 drive_state;
 
-	u32 in_flight;
 	u32 cur_max_queue_depth;
 	u32 queue_low_water_mark;
 	u32 dev_max_queue_depth;
@@ -298,27 +237,20 @@ struct skd_device {
 	u32 num_fitmsg_context;
 	u32 num_req_context;
 
-	u32 timeout_slot[SKD_N_TIMEOUT_SLOT];
-	u32 timeout_stamp;
-	struct skd_fitmsg_context *skmsg_free_list;
 	struct skd_fitmsg_context *skmsg_table;
 
-	struct skd_request_context *skreq_free_list;
-	struct skd_request_context *skreq_table;
-
-	struct skd_special_context *skspcl_free_list;
-	struct skd_special_context *skspcl_table;
-
 	struct skd_special_context internal_skspcl;
 	u32 read_cap_blocksize;
 	u32 read_cap_last_lba;
 	int read_cap_is_valid;
 	int inquiry_is_valid;
 	u8 inq_serial_num[13];  /*12 chars plus null term */
-	u8 id_str[80];          /* holds a composite name (pci + sernum) */
 
 	u8 skcomp_cycle;
 	u32 skcomp_ix;
+	struct kmem_cache *msgbuf_cache;
+	struct kmem_cache *sglist_cache;
+	struct kmem_cache *databuf_cache;
 	struct fit_completion_entry_v1 *skcomp_table;
 	struct fit_comp_error_info *skerr_table;
 	dma_addr_t cq_dma_address;
@@ -329,7 +261,6 @@ struct skd_device {
 	u32 timer_countdown;
 	u32 timer_substate;
 
-	int n_special;
 	int sgs_per_request;
 	u32 last_mtd;
 
@@ -343,7 +274,7 @@ struct skd_device {
 
 	u32 timo_slot;
 
-
+	struct work_struct start_queue;
 	struct work_struct completion_worker;
 };
 
@@ -353,53 +284,32 @@ struct skd_device {
 
 static inline u32 skd_reg_read32(struct skd_device *skdev, u32 offset)
 {
-	u32 val;
-
-	if (likely(skdev->dbg_level < 2))
-		return readl(skdev->mem_map[1] + offset);
-	else {
-		barrier();
-		val = readl(skdev->mem_map[1] + offset);
-		barrier();
-		pr_debug("%s:%s:%d offset %x = %x\n",
-			 skdev->name, __func__, __LINE__, offset, val);
-		return val;
-	}
+	u32 val = readl(skdev->mem_map[1] + offset);
 
+	if (unlikely(skdev->dbg_level >= 2))
+		dev_dbg(&skdev->pdev->dev, "offset %x = %x\n", offset, val);
+	return val;
 }
 
 static inline void skd_reg_write32(struct skd_device *skdev, u32 val,
 				   u32 offset)
 {
-	if (likely(skdev->dbg_level < 2)) {
-		writel(val, skdev->mem_map[1] + offset);
-		barrier();
-	} else {
-		barrier();
-		writel(val, skdev->mem_map[1] + offset);
-		barrier();
-		pr_debug("%s:%s:%d offset %x = %x\n",
-			 skdev->name, __func__, __LINE__, offset, val);
-	}
+	writel(val, skdev->mem_map[1] + offset);
+	if (unlikely(skdev->dbg_level >= 2))
+		dev_dbg(&skdev->pdev->dev, "offset %x = %x\n", offset, val);
 }
 
 static inline void skd_reg_write64(struct skd_device *skdev, u64 val,
 				   u32 offset)
 {
-	if (likely(skdev->dbg_level < 2)) {
-		writeq(val, skdev->mem_map[1] + offset);
-		barrier();
-	} else {
-		barrier();
-		writeq(val, skdev->mem_map[1] + offset);
-		barrier();
-		pr_debug("%s:%s:%d offset %x = %016llx\n",
-			 skdev->name, __func__, __LINE__, offset, val);
-	}
+	writeq(val, skdev->mem_map[1] + offset);
+	if (unlikely(skdev->dbg_level >= 2))
+		dev_dbg(&skdev->pdev->dev, "offset %x = %016llx\n", offset,
+			val);
 }
 
 
-#define SKD_IRQ_DEFAULT SKD_IRQ_MSI
+#define SKD_IRQ_DEFAULT SKD_IRQ_MSIX
 static int skd_isr_type = SKD_IRQ_DEFAULT;
 
 module_param(skd_isr_type, int, 0444);
@@ -412,7 +322,7 @@ static int skd_max_req_per_msg = SKD_MAX_REQ_PER_MSG_DEFAULT;
 module_param(skd_max_req_per_msg, int, 0444);
 MODULE_PARM_DESC(skd_max_req_per_msg,
 		 "Maximum SCSI requests packed in a single message."
-		 " (1-14, default==1)");
+		 " (1-" __stringify(SKD_MAX_REQ_PER_MSG) ", default==1)");
 
 #define SKD_MAX_QUEUE_DEPTH_DEFAULT 64
 #define SKD_MAX_QUEUE_DEPTH_DEFAULT_STR "64"
@@ -429,10 +339,10 @@ MODULE_PARM_DESC(skd_sgs_per_request,
 		 "Maximum SG elements per block request."
 		 " (1-4096, default==256)");
 
-static int skd_max_pass_thru = SKD_N_SPECIAL_CONTEXT;
+static int skd_max_pass_thru = 1;
 module_param(skd_max_pass_thru, int, 0444);
 MODULE_PARM_DESC(skd_max_pass_thru,
-		 "Maximum SCSI pass-thru at a time." " (1-50, default==32)");
+		 "Maximum SCSI pass-thru at a time. IGNORED");
 
 module_param(skd_dbg_level, int, 0444);
 MODULE_PARM_DESC(skd_dbg_level, "s1120 debug level (0,1,2)");
@@ -449,9 +359,6 @@ static void skd_send_fitmsg(struct skd_device *skdev,
 			    struct skd_fitmsg_context *skmsg);
 static void skd_send_special_fitmsg(struct skd_device *skdev,
 				    struct skd_special_context *skspcl);
-static void skd_request_fn(struct request_queue *rq);
-static void skd_end_request(struct skd_device *skdev,
-		struct skd_request_context *skreq, blk_status_t status);
 static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq);
 static void skd_postop_sg_list(struct skd_device *skdev,
@@ -460,19 +367,14 @@ static void skd_postop_sg_list(struct skd_device *skdev,
 static void skd_restart_device(struct skd_device *skdev);
 static int skd_quiesce_dev(struct skd_device *skdev);
 static int skd_unquiesce_dev(struct skd_device *skdev);
-static void skd_release_special(struct skd_device *skdev,
-				struct skd_special_context *skspcl);
 static void skd_disable_interrupts(struct skd_device *skdev);
 static void skd_isr_fwstate(struct skd_device *skdev);
-static void skd_recover_requests(struct skd_device *skdev, int requeue);
+static void skd_recover_requests(struct skd_device *skdev);
 static void skd_soft_reset(struct skd_device *skdev);
 
-static const char *skd_name(struct skd_device *skdev);
 const char *skd_drive_state_to_str(int state);
 const char *skd_skdev_state_to_str(enum skd_drvr_state state);
 static void skd_log_skdev(struct skd_device *skdev, const char *event);
-static void skd_log_skmsg(struct skd_device *skdev,
-			  struct skd_fitmsg_context *skmsg, const char *event);
 static void skd_log_skreq(struct skd_device *skdev,
 			  struct skd_request_context *skreq, const char *event);
 
@@ -481,18 +383,20 @@ static void skd_log_skreq(struct skd_device *skdev,
  * READ/WRITE REQUESTS
  *****************************************************************************
  */
-static void skd_fail_all_pending(struct skd_device *skdev)
+static void skd_inc_in_flight(struct request *rq, void *data, bool reserved)
 {
-	struct request_queue *q = skdev->queue;
-	struct request *req;
+	int *count = data;
 
-	for (;; ) {
-		req = blk_peek_request(q);
-		if (req == NULL)
-			break;
-		blk_start_request(req);
-		__blk_end_request_all(req, BLK_STS_IOERR);
-	}
+	count++;
+}
+
+static int skd_in_flight(struct skd_device *skdev)
+{
+	int count = 0;
+
+	blk_mq_tagset_busy_iter(&skdev->tag_set, skd_inc_in_flight, &count);
+
+	return count;
 }
 
 static void
@@ -501,9 +405,9 @@ skd_prep_rw_cdb(struct skd_scsi_request *scsi_req,
 		unsigned count)
 {
 	if (data_dir == READ)
-		scsi_req->cdb[0] = 0x28;
+		scsi_req->cdb[0] = READ_10;
 	else
-		scsi_req->cdb[0] = 0x2a;
+		scsi_req->cdb[0] = WRITE_10;
 
 	scsi_req->cdb[1] = 0;
 	scsi_req->cdb[2] = (lba & 0xff000000) >> 24;
@@ -522,7 +426,7 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
 {
 	skreq->flush_cmd = 1;
 
-	scsi_req->cdb[0] = 0x35;
+	scsi_req->cdb[0] = SYNCHRONIZE_CACHE;
 	scsi_req->cdb[1] = 0;
 	scsi_req->cdb[2] = 0;
 	scsi_req->cdb[3] = 0;
@@ -534,307 +438,194 @@ skd_prep_zerosize_flush_cdb(struct skd_scsi_request *scsi_req,
 	scsi_req->cdb[9] = 0;
 }
 
-static void skd_request_fn_not_online(struct request_queue *q);
-
-static void skd_request_fn(struct request_queue *q)
+/*
+ * Return true if and only if all pending requests should be failed.
+ */
+static bool skd_fail_all(struct request_queue *q)
 {
 	struct skd_device *skdev = q->queuedata;
-	struct skd_fitmsg_context *skmsg = NULL;
-	struct fit_msg_hdr *fmh = NULL;
-	struct skd_request_context *skreq;
-	struct request *req = NULL;
-	struct skd_scsi_request *scsi_req;
-	unsigned long io_flags;
-	u32 lba;
-	u32 count;
-	int data_dir;
-	u32 be_lba;
-	u32 be_count;
-	u64 be_dmaa;
-	u64 cmdctxt;
-	u32 timo_slot;
-	void *cmd_ptr;
-	int flush, fua;
-
-	if (skdev->state != SKD_DRVR_STATE_ONLINE) {
-		skd_request_fn_not_online(q);
-		return;
-	}
 
-	if (blk_queue_stopped(skdev->queue)) {
-		if (skdev->skmsg_free_list == NULL ||
-		    skdev->skreq_free_list == NULL ||
-		    skdev->in_flight >= skdev->queue_low_water_mark)
-			/* There is still some kind of shortage */
-			return;
-
-		queue_flag_clear(QUEUE_FLAG_STOPPED, skdev->queue);
-	}
+	SKD_ASSERT(skdev->state != SKD_DRVR_STATE_ONLINE);
 
-	/*
-	 * Stop conditions:
-	 *  - There are no more native requests
-	 *  - There are already the maximum number of requests in progress
-	 *  - There are no more skd_request_context entries
-	 *  - There are no more FIT msg buffers
+	skd_log_skdev(skdev, "req_not_online");
+	switch (skdev->state) {
+	case SKD_DRVR_STATE_PAUSING:
+	case SKD_DRVR_STATE_PAUSED:
+	case SKD_DRVR_STATE_STARTING:
+	case SKD_DRVR_STATE_RESTARTING:
+	case SKD_DRVR_STATE_WAIT_BOOT:
+	/* In case of starting, we haven't started the queue,
+	 * so we can't get here... but requests are
+	 * possibly hanging out waiting for us because we
+	 * reported the dev/skd0 already.  They'll wait
+	 * forever if connect doesn't complete.
+	 * What to do??? delay dev/skd0 ??
 	 */
-	for (;; ) {
-
-		flush = fua = 0;
-
-		req = blk_peek_request(q);
-
-		/* Are there any native requests to start? */
-		if (req == NULL)
-			break;
-
-		lba = (u32)blk_rq_pos(req);
-		count = blk_rq_sectors(req);
-		data_dir = rq_data_dir(req);
-		io_flags = req->cmd_flags;
-
-		if (req_op(req) == REQ_OP_FLUSH)
-			flush++;
-
-		if (io_flags & REQ_FUA)
-			fua++;
-
-		pr_debug("%s:%s:%d new req=%p lba=%u(0x%x) "
-			 "count=%u(0x%x) dir=%d\n",
-			 skdev->name, __func__, __LINE__,
-			 req, lba, lba, count, count, data_dir);
-
-		/* At this point we know there is a request */
+	case SKD_DRVR_STATE_BUSY:
+	case SKD_DRVR_STATE_BUSY_IMMINENT:
+	case SKD_DRVR_STATE_BUSY_ERASE:
+		return false;
 
-		/* Are too many requets already in progress? */
-		if (skdev->in_flight >= skdev->cur_max_queue_depth) {
-			pr_debug("%s:%s:%d qdepth %d, limit %d\n",
-				 skdev->name, __func__, __LINE__,
-				 skdev->in_flight, skdev->cur_max_queue_depth);
-			break;
-		}
+	case SKD_DRVR_STATE_BUSY_SANITIZE:
+	case SKD_DRVR_STATE_STOPPING:
+	case SKD_DRVR_STATE_SYNCING:
+	case SKD_DRVR_STATE_FAULT:
+	case SKD_DRVR_STATE_DISAPPEARED:
+	default:
+		return true;
+	}
+}
 
-		/* Is a skd_request_context available? */
-		skreq = skdev->skreq_free_list;
-		if (skreq == NULL) {
-			pr_debug("%s:%s:%d Out of req=%p\n",
-				 skdev->name, __func__, __LINE__, q);
-			break;
-		}
-		SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
-		SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
-
-		/* Now we check to see if we can get a fit msg */
-		if (skmsg == NULL) {
-			if (skdev->skmsg_free_list == NULL) {
-				pr_debug("%s:%s:%d Out of msg\n",
-					 skdev->name, __func__, __LINE__);
-				break;
-			}
-		}
+static blk_status_t skd_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+				    const struct blk_mq_queue_data *mqd)
+{
+	struct request *const req = mqd->rq;
+	struct request_queue *const q = req->q;
+	struct skd_device *skdev = q->queuedata;
+	struct skd_fitmsg_context *skmsg;
+	struct fit_msg_hdr *fmh;
+	const u32 tag = blk_mq_unique_tag(req);
+	struct skd_request_context *const skreq = blk_mq_rq_to_pdu(req);
+	struct skd_scsi_request *scsi_req;
+	unsigned long flags = 0;
+	const u32 lba = blk_rq_pos(req);
+	const u32 count = blk_rq_sectors(req);
+	const int data_dir = rq_data_dir(req);
 
-		skreq->flush_cmd = 0;
-		skreq->n_sg = 0;
-		skreq->sg_byte_count = 0;
+	if (unlikely(skdev->state != SKD_DRVR_STATE_ONLINE))
+		return skd_fail_all(q) ? BLK_STS_IOERR : BLK_STS_RESOURCE;
 
-		/*
-		 * OK to now dequeue request from q.
-		 *
-		 * At this point we are comitted to either start or reject
-		 * the native request. Note that skd_request_context is
-		 * available but is still at the head of the free list.
-		 */
-		blk_start_request(req);
-		skreq->req = req;
-		skreq->fitmsg_id = 0;
-
-		/* Either a FIT msg is in progress or we have to start one. */
-		if (skmsg == NULL) {
-			/* Are there any FIT msg buffers available? */
-			skmsg = skdev->skmsg_free_list;
-			if (skmsg == NULL) {
-				pr_debug("%s:%s:%d Out of msg skdev=%p\n",
-					 skdev->name, __func__, __LINE__,
-					 skdev);
-				break;
-			}
-			SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE);
-			SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0);
+	blk_mq_start_request(req);
 
-			skdev->skmsg_free_list = skmsg->next;
+	WARN_ONCE(tag >= skd_max_queue_depth, "%#x > %#x (nr_requests = %lu)\n",
+		  tag, skd_max_queue_depth, q->nr_requests);
 
-			skmsg->state = SKD_MSG_STATE_BUSY;
-			skmsg->id += SKD_ID_INCR;
+	SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
 
-			/* Initialize the FIT msg header */
-			fmh = (struct fit_msg_hdr *)skmsg->msg_buf;
-			memset(fmh, 0, sizeof(*fmh));
-			fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
-			skmsg->length = sizeof(*fmh);
-		}
+	dev_dbg(&skdev->pdev->dev,
+		"new req=%p lba=%u(0x%x) count=%u(0x%x) dir=%d\n", req, lba,
+		lba, count, count, data_dir);
 
-		skreq->fitmsg_id = skmsg->id;
+	skreq->id = tag + SKD_ID_RW_REQUEST;
+	skreq->flush_cmd = 0;
+	skreq->n_sg = 0;
+	skreq->sg_byte_count = 0;
 
-		/*
-		 * Note that a FIT msg may have just been started
-		 * but contains no SoFIT requests yet.
-		 */
+	skreq->fitmsg_id = 0;
 
-		/*
-		 * Transcode the request, checking as we go. The outcome of
-		 * the transcoding is represented by the error variable.
-		 */
-		cmd_ptr = &skmsg->msg_buf[skmsg->length];
-		memset(cmd_ptr, 0, 32);
+	skreq->data_dir = data_dir == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 
-		be_lba = cpu_to_be32(lba);
-		be_count = cpu_to_be32(count);
-		be_dmaa = cpu_to_be64((u64)skreq->sksg_dma_address);
-		cmdctxt = skreq->id + SKD_ID_INCR;
+	if (req->bio && !skd_preop_sg_list(skdev, skreq)) {
+		dev_dbg(&skdev->pdev->dev, "error Out\n");
+		skreq->status = BLK_STS_RESOURCE;
+		blk_mq_complete_request(req);
+		return BLK_STS_OK;
+	}
 
-		scsi_req = cmd_ptr;
-		scsi_req->hdr.tag = cmdctxt;
-		scsi_req->hdr.sg_list_dma_address = be_dmaa;
+	dma_sync_single_for_device(&skdev->pdev->dev, skreq->sksg_dma_address,
+				   skreq->n_sg *
+				   sizeof(struct fit_sg_descriptor),
+				   DMA_TO_DEVICE);
 
-		if (data_dir == READ)
-			skreq->sg_data_dir = SKD_DATA_DIR_CARD_TO_HOST;
-		else
-			skreq->sg_data_dir = SKD_DATA_DIR_HOST_TO_CARD;
+	/* Either a FIT msg is in progress or we have to start one. */
+	if (skd_max_req_per_msg == 1) {
+		skmsg = NULL;
+	} else {
+		spin_lock_irqsave(&skdev->lock, flags);
+		skmsg = skdev->skmsg;
+	}
+	if (!skmsg) {
+		skmsg = &skdev->skmsg_table[tag];
+		skdev->skmsg = skmsg;
 
-		if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) {
-			skd_prep_zerosize_flush_cdb(scsi_req, skreq);
-			SKD_ASSERT(skreq->flush_cmd == 1);
+		/* Initialize the FIT msg header */
+		fmh = &skmsg->msg_buf->fmh;
+		memset(fmh, 0, sizeof(*fmh));
+		fmh->protocol_id = FIT_PROTOCOL_ID_SOFIT;
+		skmsg->length = sizeof(*fmh);
+	} else {
+		fmh = &skmsg->msg_buf->fmh;
+	}
 
-		} else {
-			skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
-		}
+	skreq->fitmsg_id = skmsg->id;
 
-		if (fua)
-			scsi_req->cdb[1] |= SKD_FUA_NV;
+	scsi_req = &skmsg->msg_buf->scsi[fmh->num_protocol_cmds_coalesced];
+	memset(scsi_req, 0, sizeof(*scsi_req));
 
-		if (!req->bio)
-			goto skip_sg;
+	scsi_req->hdr.tag = skreq->id;
+	scsi_req->hdr.sg_list_dma_address =
+		cpu_to_be64(skreq->sksg_dma_address);
 
-		if (!skd_preop_sg_list(skdev, skreq)) {
-			/*
-			 * Complete the native request with error.
-			 * Note that the request context is still at the
-			 * head of the free list, and that the SoFIT request
-			 * was encoded into the FIT msg buffer but the FIT
-			 * msg length has not been updated. In short, the
-			 * only resource that has been allocated but might
-			 * not be used is that the FIT msg could be empty.
-			 */
-			pr_debug("%s:%s:%d error Out\n",
-				 skdev->name, __func__, __LINE__);
-			skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
-			continue;
-		}
+	if (req_op(req) == REQ_OP_FLUSH) {
+		skd_prep_zerosize_flush_cdb(scsi_req, skreq);
+		SKD_ASSERT(skreq->flush_cmd == 1);
+	} else {
+		skd_prep_rw_cdb(scsi_req, data_dir, lba, count);
+	}
 
-skip_sg:
-		scsi_req->hdr.sg_list_len_bytes =
-			cpu_to_be32(skreq->sg_byte_count);
+	if (req->cmd_flags & REQ_FUA)
+		scsi_req->cdb[1] |= SKD_FUA_NV;
 
-		/* Complete resource allocations. */
-		skdev->skreq_free_list = skreq->next;
-		skreq->state = SKD_REQ_STATE_BUSY;
-		skreq->id += SKD_ID_INCR;
+	scsi_req->hdr.sg_list_len_bytes = cpu_to_be32(skreq->sg_byte_count);
 
-		skmsg->length += sizeof(struct skd_scsi_request);
-		fmh->num_protocol_cmds_coalesced++;
+	/* Complete resource allocations. */
+	skreq->state = SKD_REQ_STATE_BUSY;
 
-		/*
-		 * Update the active request counts.
-		 * Capture the timeout timestamp.
-		 */
-		skreq->timeout_stamp = skdev->timeout_stamp;
-		timo_slot = skreq->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
-		skdev->timeout_slot[timo_slot]++;
-		skdev->in_flight++;
-		pr_debug("%s:%s:%d req=0x%x busy=%d\n",
-			 skdev->name, __func__, __LINE__,
-			 skreq->id, skdev->in_flight);
+	skmsg->length += sizeof(struct skd_scsi_request);
+	fmh->num_protocol_cmds_coalesced++;
 
-		/*
-		 * If the FIT msg buffer is full send it.
-		 */
-		if (skmsg->length >= SKD_N_FITMSG_BYTES ||
-		    fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
-			skd_send_fitmsg(skdev, skmsg);
-			skmsg = NULL;
-			fmh = NULL;
-		}
-	}
+	dev_dbg(&skdev->pdev->dev, "req=0x%x busy=%d\n", skreq->id,
+		skd_in_flight(skdev));
 
 	/*
-	 * Is a FIT msg in progress? If it is empty put the buffer back
-	 * on the free list. If it is non-empty send what we got.
-	 * This minimizes latency when there are fewer requests than
-	 * what fits in a FIT msg.
+	 * If the FIT msg buffer is full send it.
 	 */
-	if (skmsg != NULL) {
-		/* Bigger than just a FIT msg header? */
-		if (skmsg->length > sizeof(struct fit_msg_hdr)) {
-			pr_debug("%s:%s:%d sending msg=%p, len %d\n",
-				 skdev->name, __func__, __LINE__,
-				 skmsg, skmsg->length);
+	if (skd_max_req_per_msg == 1) {
+		skd_send_fitmsg(skdev, skmsg);
+	} else {
+		if (mqd->last ||
+		    fmh->num_protocol_cmds_coalesced >= skd_max_req_per_msg) {
 			skd_send_fitmsg(skdev, skmsg);
-		} else {
-			/*
-			 * The FIT msg is empty. It means we got started
-			 * on the msg, but the requests were rejected.
-			 */
-			skmsg->state = SKD_MSG_STATE_IDLE;
-			skmsg->id += SKD_ID_INCR;
-			skmsg->next = skdev->skmsg_free_list;
-			skdev->skmsg_free_list = skmsg;
+			skdev->skmsg = NULL;
 		}
-		skmsg = NULL;
-		fmh = NULL;
+		spin_unlock_irqrestore(&skdev->lock, flags);
 	}
 
-	/*
-	 * If req is non-NULL it means there is something to do but
-	 * we are out of a resource.
-	 */
-	if (req)
-		blk_stop_queue(skdev->queue);
+	return BLK_STS_OK;
 }
 
-static void skd_end_request(struct skd_device *skdev,
-		struct skd_request_context *skreq, blk_status_t error)
+static enum blk_eh_timer_return skd_timed_out(struct request *req,
+					      bool reserved)
 {
-	if (unlikely(error)) {
-		struct request *req = skreq->req;
-		char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
-		u32 lba = (u32)blk_rq_pos(req);
-		u32 count = blk_rq_sectors(req);
-
-		pr_err("(%s): Error cmd=%s sect=%u count=%u id=0x%x\n",
-		       skd_name(skdev), cmd, lba, count, skreq->id);
-	} else
-		pr_debug("%s:%s:%d id=0x%x error=%d\n",
-			 skdev->name, __func__, __LINE__, skreq->id, error);
+	struct skd_device *skdev = req->q->queuedata;
+
+	dev_err(&skdev->pdev->dev, "request with tag %#x timed out\n",
+		blk_mq_unique_tag(req));
 
-	__blk_end_request_all(skreq->req, error);
+	return BLK_EH_RESET_TIMER;
+}
+
+static void skd_complete_rq(struct request *req)
+{
+	struct skd_request_context *skreq = blk_mq_rq_to_pdu(req);
+
+	blk_mq_end_request(req, skreq->status);
 }
 
 static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq)
 {
-	struct request *req = skreq->req;
-	int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
-	int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
-	struct scatterlist *sg = &skreq->sg[0];
+	struct request *req = blk_mq_rq_from_pdu(skreq);
+	struct scatterlist *sgl = &skreq->sg[0], *sg;
 	int n_sg;
 	int i;
 
 	skreq->sg_byte_count = 0;
 
-	/* SKD_ASSERT(skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD ||
-		   skreq->sg_data_dir == SKD_DATA_DIR_CARD_TO_HOST); */
+	WARN_ON_ONCE(skreq->data_dir != DMA_TO_DEVICE &&
+		     skreq->data_dir != DMA_FROM_DEVICE);
 
-	n_sg = blk_rq_map_sg(skdev->queue, req, sg);
+	n_sg = blk_rq_map_sg(skdev->queue, req, sgl);
 	if (n_sg <= 0)
 		return false;
 
@@ -842,7 +633,7 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
 	 * Map scatterlist to PCI bus addresses.
 	 * Note PCI might change the number of entries.
 	 */
-	n_sg = pci_map_sg(skdev->pdev, sg, n_sg, pci_dir);
+	n_sg = pci_map_sg(skdev->pdev, sgl, n_sg, skreq->data_dir);
 	if (n_sg <= 0)
 		return false;
 
@@ -850,10 +641,10 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
 
 	skreq->n_sg = n_sg;
 
-	for (i = 0; i < n_sg; i++) {
+	for_each_sg(sgl, sg, n_sg, i) {
 		struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
-		u32 cnt = sg_dma_len(&sg[i]);
-		uint64_t dma_addr = sg_dma_address(&sg[i]);
+		u32 cnt = sg_dma_len(sg);
+		uint64_t dma_addr = sg_dma_address(sg);
 
 		sgd->control = FIT_SGD_CONTROL_NOT_LAST;
 		sgd->byte_count = cnt;
@@ -866,16 +657,16 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
 	skreq->sksg_list[n_sg - 1].control = FIT_SGD_CONTROL_LAST;
 
 	if (unlikely(skdev->dbg_level > 1)) {
-		pr_debug("%s:%s:%d skreq=%x sksg_list=%p sksg_dma=%llx\n",
-			 skdev->name, __func__, __LINE__,
-			 skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
+		dev_dbg(&skdev->pdev->dev,
+			"skreq=%x sksg_list=%p sksg_dma=%llx\n",
+			skreq->id, skreq->sksg_list, skreq->sksg_dma_address);
 		for (i = 0; i < n_sg; i++) {
 			struct fit_sg_descriptor *sgd = &skreq->sksg_list[i];
-			pr_debug("%s:%s:%d   sg[%d] count=%u ctrl=0x%x "
-				 "addr=0x%llx next=0x%llx\n",
-				 skdev->name, __func__, __LINE__,
-				 i, sgd->byte_count, sgd->control,
-				 sgd->host_side_addr, sgd->next_desc_ptr);
+
+			dev_dbg(&skdev->pdev->dev,
+				"  sg[%d] count=%u ctrl=0x%x addr=0x%llx next=0x%llx\n",
+				i, sgd->byte_count, sgd->control,
+				sgd->host_side_addr, sgd->next_desc_ptr);
 		}
 	}
 
@@ -885,9 +676,6 @@ static bool skd_preop_sg_list(struct skd_device *skdev,
 static void skd_postop_sg_list(struct skd_device *skdev,
 			       struct skd_request_context *skreq)
 {
-	int writing = skreq->sg_data_dir == SKD_DATA_DIR_HOST_TO_CARD;
-	int pci_dir = writing ? PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE;
-
 	/*
 	 * restore the next ptr for next IO request so we
 	 * don't have to set it every time.
@@ -895,51 +683,7 @@ static void skd_postop_sg_list(struct skd_device *skdev,
 	skreq->sksg_list[skreq->n_sg - 1].next_desc_ptr =
 		skreq->sksg_dma_address +
 		((skreq->n_sg) * sizeof(struct fit_sg_descriptor));
-	pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, pci_dir);
-}
-
-static void skd_request_fn_not_online(struct request_queue *q)
-{
-	struct skd_device *skdev = q->queuedata;
-	int error;
-
-	SKD_ASSERT(skdev->state != SKD_DRVR_STATE_ONLINE);
-
-	skd_log_skdev(skdev, "req_not_online");
-	switch (skdev->state) {
-	case SKD_DRVR_STATE_PAUSING:
-	case SKD_DRVR_STATE_PAUSED:
-	case SKD_DRVR_STATE_STARTING:
-	case SKD_DRVR_STATE_RESTARTING:
-	case SKD_DRVR_STATE_WAIT_BOOT:
-	/* In case of starting, we haven't started the queue,
-	 * so we can't get here... but requests are
-	 * possibly hanging out waiting for us because we
-	 * reported the dev/skd0 already.  They'll wait
-	 * forever if connect doesn't complete.
-	 * What to do??? delay dev/skd0 ??
-	 */
-	case SKD_DRVR_STATE_BUSY:
-	case SKD_DRVR_STATE_BUSY_IMMINENT:
-	case SKD_DRVR_STATE_BUSY_ERASE:
-	case SKD_DRVR_STATE_DRAINING_TIMEOUT:
-		return;
-
-	case SKD_DRVR_STATE_BUSY_SANITIZE:
-	case SKD_DRVR_STATE_STOPPING:
-	case SKD_DRVR_STATE_SYNCING:
-	case SKD_DRVR_STATE_FAULT:
-	case SKD_DRVR_STATE_DISAPPEARED:
-	default:
-		error = -EIO;
-		break;
-	}
-
-	/* If we get here, terminate all pending block requeusts
-	 * with EIO and any scsi pass thru with appropriate sense
-	 */
-
-	skd_fail_all_pending(skdev);
+	pci_unmap_sg(skdev->pdev, &skreq->sg[0], skreq->n_sg, skreq->data_dir);
 }
 
 /*
@@ -950,12 +694,22 @@ static void skd_request_fn_not_online(struct request_queue *q)
 
 static void skd_timer_tick_not_online(struct skd_device *skdev);
 
+static void skd_start_queue(struct work_struct *work)
+{
+	struct skd_device *skdev = container_of(work, typeof(*skdev),
+						start_queue);
+
+	/*
+	 * Although it is safe to call blk_start_queue() from interrupt
+	 * context, blk_mq_start_hw_queues() must not be called from
+	 * interrupt context.
+	 */
+	blk_mq_start_hw_queues(skdev->queue);
+}
+
 static void skd_timer_tick(ulong arg)
 {
 	struct skd_device *skdev = (struct skd_device *)arg;
-
-	u32 timo_slot;
-	u32 overdue_timestamp;
 	unsigned long reqflags;
 	u32 state;
 
@@ -972,37 +726,9 @@ static void skd_timer_tick(ulong arg)
 	if (state != skdev->drive_state)
 		skd_isr_fwstate(skdev);
 
-	if (skdev->state != SKD_DRVR_STATE_ONLINE) {
+	if (skdev->state != SKD_DRVR_STATE_ONLINE)
 		skd_timer_tick_not_online(skdev);
-		goto timer_func_out;
-	}
-	skdev->timeout_stamp++;
-	timo_slot = skdev->timeout_stamp & SKD_TIMEOUT_SLOT_MASK;
-
-	/*
-	 * All requests that happened during the previous use of
-	 * this slot should be done by now. The previous use was
-	 * over 7 seconds ago.
-	 */
-	if (skdev->timeout_slot[timo_slot] == 0)
-		goto timer_func_out;
-
-	/* Something is overdue */
-	overdue_timestamp = skdev->timeout_stamp - SKD_N_TIMEOUT_SLOT;
-
-	pr_debug("%s:%s:%d found %d timeouts, draining busy=%d\n",
-		 skdev->name, __func__, __LINE__,
-		 skdev->timeout_slot[timo_slot], skdev->in_flight);
-	pr_err("(%s): Overdue IOs (%d), busy %d\n",
-	       skd_name(skdev), skdev->timeout_slot[timo_slot],
-	       skdev->in_flight);
 
-	skdev->timer_countdown = SKD_DRAINING_TIMO;
-	skdev->state = SKD_DRVR_STATE_DRAINING_TIMEOUT;
-	skdev->timo_slot = timo_slot;
-	blk_stop_queue(skdev->queue);
-
-timer_func_out:
 	mod_timer(&skdev->timer, (jiffies + HZ));
 
 	spin_unlock_irqrestore(&skdev->lock, reqflags);
@@ -1015,9 +741,9 @@ static void skd_timer_tick_not_online(struct skd_device *skdev)
 	case SKD_DRVR_STATE_LOAD:
 		break;
 	case SKD_DRVR_STATE_BUSY_SANITIZE:
-		pr_debug("%s:%s:%d drive busy sanitize[%x], driver[%x]\n",
-			 skdev->name, __func__, __LINE__,
-			 skdev->drive_state, skdev->state);
+		dev_dbg(&skdev->pdev->dev,
+			"drive busy sanitize[%x], driver[%x]\n",
+			skdev->drive_state, skdev->state);
 		/* If we've been in sanitize for 3 seconds, we figu